aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/6lowpan/nhc.c103
-rw-r--r--net/6lowpan/nhc.h38
-rw-r--r--net/6lowpan/nhc_dest.c9
-rw-r--r--net/6lowpan/nhc_fragment.c9
-rw-r--r--net/6lowpan/nhc_ghc_ext_dest.c9
-rw-r--r--net/6lowpan/nhc_ghc_ext_frag.c11
-rw-r--r--net/6lowpan/nhc_ghc_ext_hop.c9
-rw-r--r--net/6lowpan/nhc_ghc_ext_route.c9
-rw-r--r--net/6lowpan/nhc_ghc_icmpv6.c9
-rw-r--r--net/6lowpan/nhc_ghc_udp.c9
-rw-r--r--net/6lowpan/nhc_hop.c9
-rw-r--r--net/6lowpan/nhc_ipv6.c11
-rw-r--r--net/6lowpan/nhc_mobility.c9
-rw-r--r--net/6lowpan/nhc_routing.c9
-rw-r--r--net/6lowpan/nhc_udp.c9
-rw-r--r--net/802/garp.c2
-rw-r--r--net/802/mrp.c2
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_core.c15
-rw-r--r--net/8021q/vlan_dev.c38
-rw-r--r--net/8021q/vlan_netlink.c7
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/Kconfig7
-rw-r--r--net/9p/Makefile5
-rw-r--r--net/9p/client.c348
-rw-r--r--net/9p/mod.c15
-rw-r--r--net/9p/protocol.c170
-rw-r--r--net/9p/protocol.h2
-rw-r--r--net/9p/trans_fd.c77
-rw-r--r--net/9p/trans_rdma.c3
-rw-r--r--net/9p/trans_virtio.c46
-rw-r--r--net/9p/trans_xen.c30
-rw-r--r--net/Kconfig15
-rw-r--r--net/Kconfig.debug11
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/mpoa_proc.c3
-rw-r--r--net/atm/proc.c4
-rw-r--r--net/ax25/af_ax25.c106
-rw-r--r--net/ax25/ax25_dev.c50
-rw-r--r--net/ax25/ax25_route.c18
-rw-r--r--net/ax25/ax25_subr.c20
-rw-r--r--net/ax25/ax25_timer.c4
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c7
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c11
-rw-r--r--net/batman-adv/gateway_client.c1
-rw-r--r--net/batman-adv/hard-interface.c41
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c3
-rw-r--r--net/batman-adv/netlink.c1
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/trace.h11
-rw-r--r--net/batman-adv/translation-table.c14
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/batman-adv/types.h39
-rw-r--r--net/bluetooth/6lowpan.c3
-rw-r--r--net/bluetooth/Kconfig1
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/af_bluetooth.c23
-rw-r--r--net/bluetooth/aosp.c15
-rw-r--r--net/bluetooth/bnep/core.c4
-rw-r--r--net/bluetooth/cmtp/core.c2
-rw-r--r--net/bluetooth/eir.c93
-rw-r--r--net/bluetooth/eir.h25
-rw-r--r--net/bluetooth/hci_conn.c1099
-rw-r--r--net/bluetooth/hci_core.c648
-rw-r--r--net/bluetooth/hci_debugfs.c2
-rw-r--r--net/bluetooth/hci_event.c912
-rw-r--r--net/bluetooth/hci_request.c1923
-rw-r--r--net/bluetooth/hci_request.h51
-rw-r--r--net/bluetooth/hci_sock.c12
-rw-r--r--net/bluetooth/hci_sync.c1321
-rw-r--r--net/bluetooth/hci_sysfs.c3
-rw-r--r--net/bluetooth/hidp/core.c8
-rw-r--r--net/bluetooth/iso.c1860
-rw-r--r--net/bluetooth/l2cap_core.c170
-rw-r--r--net/bluetooth/lib.c71
-rw-r--r--net/bluetooth/mgmt.c1321
-rw-r--r--net/bluetooth/mgmt_util.c79
-rw-r--r--net/bluetooth/mgmt_util.h18
-rw-r--r--net/bluetooth/msft.c447
-rw-r--r--net/bluetooth/msft.h6
-rw-r--r--net/bluetooth/rfcomm/sock.c3
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c30
-rw-r--r--net/bpf/test_run.c731
-rw-r--r--net/bpfilter/bpfilter_kern.c2
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br.c20
-rw-r--r--net/bridge/br_arp_nd_proxy.c4
-rw-r--r--net/bridge/br_device.c9
-rw-r--r--net/bridge/br_fdb.c160
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c52
-rw-r--r--net/bridge/br_input.c35
-rw-r--r--net/bridge/br_mdb.c27
-rw-r--r--net/bridge/br_mst.c357
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c23
-rw-r--r--net/bridge/br_netfilter_ipv6.c1
-rw-r--r--net/bridge/br_netlink.c75
-rw-r--r--net/bridge/br_private.h88
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_switchdev.c144
-rw-r--r--net/bridge/br_sysfs_br.c6
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/br_vlan.c182
-rw-r--r--net/bridge/br_vlan_options.c20
-rw-r--r--net/bridge/netfilter/ebtable_broute.c8
-rw-r--r--net/bridge/netfilter/ebtable_filter.c8
-rw-r--r--net/bridge/netfilter/ebtable_nat.c8
-rw-r--r--net/bridge/netfilter/ebtables.c12
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c7
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c7
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c9
-rw-r--r--net/caif/caif_dev.c4
-rw-r--r--net/caif/caif_socket.c22
-rw-r--r--net/caif/caif_usb.c2
-rw-r--r--net/caif/cfcnfg.c4
-rw-r--r--net/caif/cfctrl.c2
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/Kconfig5
-rw-r--r--net/can/af_can.c81
-rw-r--r--net/can/bcm.c61
-rw-r--r--net/can/gw.c29
-rw-r--r--net/can/isotp.c482
-rw-r--r--net/can/j1939/main.c7
-rw-r--r--net/can/j1939/socket.c9
-rw-r--r--net/can/j1939/transport.c14
-rw-r--r--net/can/proc.c2
-rw-r--r--net/can/raw.c102
-rw-r--r--net/ceph/buffer.c4
-rw-r--r--net/ceph/ceph_common.c52
-rw-r--r--net/ceph/crush/mapper.c5
-rw-r--r--net/ceph/crypto.c2
-rw-r--r--net/ceph/messenger.c61
-rw-r--r--net/ceph/messenger_v1.c58
-rw-r--r--net/ceph/messenger_v2.c246
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c334
-rw-r--r--net/ceph/osdmap.c44
-rw-r--r--net/ceph/pagelist.c2
-rw-r--r--net/compat.c41
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/bpf_sk_storage.c51
-rw-r--r--net/core/datagram.c29
-rw-r--r--net/core/datagram.h15
-rw-r--r--net/core/dev.c1015
-rw-r--r--net/core/dev.h112
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/dev_ioctl.c6
-rw-r--r--net/core/devlink.c2723
-rw-r--r--net/core/drop_monitor.c126
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/failover.c4
-rw-r--r--net/core/filter.c1454
-rw-r--r--net/core/flow_dissector.c143
-rw-r--r--net/core/flow_offload.c27
-rw-r--r--net/core/gen_stats.c2
-rw-r--r--net/core/gro.c67
-rw-r--r--net/core/gro_cells.c43
-rw-r--r--net/core/link_watch.c9
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/lwtunnel.c1
-rw-r--r--net/core/neighbour.c135
-rw-r--r--net/core/net-procfs.c40
-rw-r--r--net/core/net-sysfs.c97
-rw-r--r--net/core/net_namespace.c31
-rw-r--r--net/core/netclassid_cgroup.c2
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/of_net.c33
-rw-r--r--net/core/page_pool.c188
-rw-r--r--net/core/pktgen.c59
-rw-r--r--net/core/ptp_classifier.c12
-rw-r--r--net/core/rtnetlink.c1029
-rw-r--r--net/core/secure_seq.c20
-rw-r--r--net/core/skbuff.c424
-rw-r--r--net/core/skmsg.c126
-rw-r--r--net/core/sock.c343
-rw-r--r--net/core/sock_map.c147
-rw-r--r--net/core/sock_reuseport.c20
-rw-r--r--net/core/stream.c11
-rw-r--r--net/core/sysctl_net_core.c65
-rw-r--r--net/core/utils.c4
-rw-r--r--net/core/xdp.c92
-rw-r--r--net/dcb/dcbnl.c44
-rw-r--r--net/dccp/dccp.h9
-rw-r--r--net/dccp/ipv4.c36
-rw-r--r--net/dccp/ipv6.c24
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dccp/proto.c53
-rw-r--r--net/decnet/Kconfig43
-rw-r--r--net/decnet/Makefile10
-rw-r--r--net/decnet/README8
-rw-r--r--net/decnet/af_decnet.c2400
-rw-r--r--net/decnet/dn_dev.c1433
-rw-r--r--net/decnet/dn_fib.c798
-rw-r--r--net/decnet/dn_neigh.c605
-rw-r--r--net/decnet/dn_nsp_in.c907
-rw-r--r--net/decnet/dn_nsp_out.c695
-rw-r--r--net/decnet/dn_route.c1922
-rw-r--r--net/decnet/dn_rules.c253
-rw-r--r--net/decnet/dn_table.c929
-rw-r--r--net/decnet/dn_timer.c104
-rw-r--r--net/decnet/netfilter/Kconfig17
-rw-r--r--net/decnet/netfilter/Makefile6
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c158
-rw-r--r--net/decnet/sysctl_net_decnet.c362
-rw-r--r--net/dsa/Kconfig11
-rw-r--r--net/dsa/Makefile11
-rw-r--r--net/dsa/dsa.c79
-rw-r--r--net/dsa/dsa2.c506
-rw-r--r--net/dsa/dsa_priv.h168
-rw-r--r--net/dsa/master.c110
-rw-r--r--net/dsa/netlink.c63
-rw-r--r--net/dsa/port.c1031
-rw-r--r--net/dsa/slave.c1332
-rw-r--r--net/dsa/switch.c561
-rw-r--r--net/dsa/tag_8021q.c341
-rw-r--r--net/dsa/tag_brcm.c4
-rw-r--r--net/dsa/tag_dsa.c19
-rw-r--r--net/dsa/tag_hellcreek.c10
-rw-r--r--net/dsa/tag_ksz.c59
-rw-r--r--net/dsa/tag_lan9303.c21
-rw-r--r--net/dsa/tag_ocelot_8021q.c11
-rw-r--r--net/dsa/tag_qca.c85
-rw-r--r--net/dsa/tag_rtl8_4.c152
-rw-r--r--net/dsa/tag_rzn1_a5psw.c113
-rw-r--r--net/dsa/tag_sja1105.c28
-rw-r--r--net/ethernet/eth.c11
-rw-r--r--net/ethtool/Makefile3
-rw-r--r--net/ethtool/cabletest.c2
-rw-r--r--net/ethtool/common.c3
-rw-r--r--net/ethtool/common.h1
-rw-r--r--net/ethtool/eeprom.c4
-rw-r--r--net/ethtool/ioctl.c30
-rw-r--r--net/ethtool/linkmodes.c5
-rw-r--r--net/ethtool/netlink.c27
-rw-r--r--net/ethtool/netlink.h8
-rw-r--r--net/ethtool/pse-pd.c185
-rw-r--r--net/ethtool/rings.c62
-rw-r--r--net/ethtool/strset.c2
-rw-r--r--net/ethtool/tunnels.c2
-rw-r--r--net/hsr/hsr_debugfs.c50
-rw-r--r--net/hsr/hsr_device.c12
-rw-r--r--net/hsr/hsr_forward.c19
-rw-r--r--net/hsr/hsr_framereg.c209
-rw-r--r--net/hsr/hsr_framereg.h14
-rw-r--r--net/hsr/hsr_main.h30
-rw-r--r--net/hsr/hsr_netlink.c5
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c1
-rw-r--r--net/ieee802154/netlink.c1
-rw-r--r--net/ieee802154/nl-phy.c4
-rw-r--r--net/ieee802154/nl802154.c9
-rw-r--r--net/ieee802154/socket.c62
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c111
-rw-r--r--net/ipv4/ah4.c23
-rw-r--r--net/ipv4/arp.c45
-rw-r--r--net/ipv4/bpf_tcp_ca.c81
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/datagram.c13
-rw-r--r--net/ipv4/devinet.c33
-rw-r--r--net/ipv4/esp4.c74
-rw-r--r--net/ipv4/esp4_offload.c9
-rw-r--r--net/ipv4/fib_frontend.c49
-rw-r--r--net/ipv4/fib_lookup.h10
-rw-r--r--net/ipv4/fib_rules.c21
-rw-r--r--net/ipv4/fib_semantics.c166
-rw-r--r--net/ipv4/fib_trie.c96
-rw-r--r--net/ipv4/fou.c11
-rw-r--r--net/ipv4/gre_offload.c9
-rw-r--r--net/ipv4/icmp.c190
-rw-r--r--net/ipv4/igmp.c90
-rw-r--r--net/ipv4/inet_connection_sock.c310
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_fragment.c11
-rw-r--r--net/ipv4/inet_hashtables.c543
-rw-r--r--net/ipv4/inet_timewait_sock.c31
-rw-r--r--net/ipv4/inetpeer.c12
-rw-r--r--net/ipv4/ip_forward.c17
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_gre.c93
-rw-r--r--net/ipv4/ip_input.c70
-rw-r--r--net/ipv4/ip_options.c31
-rw-r--r--net/ipv4/ip_output.c109
-rw-r--r--net/ipv4/ip_sockglue.c128
-rw-r--r--net/ipv4/ip_tunnel.c33
-rw-r--r--net/ipv4/ip_tunnel_core.c69
-rw-r--r--net/ipv4/ipcomp.c10
-rw-r--r--net/ipv4/ipconfig.c14
-rw-r--r--net/ipv4/ipip.c62
-rw-r--r--net/ipv4/ipmr.c252
-rw-r--r--net/ipv4/ipmr_base.c53
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c4
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c0
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c110
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c24
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c14
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c16
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c1
-rw-r--r--net/ipv4/nexthop.c19
-rw-r--r--net/ipv4/ping.c149
-rw-r--r--net/ipv4/proc.c6
-rw-r--r--net/ipv4/raw.c185
-rw-r--r--net/ipv4/raw_diag.c57
-rw-r--r--net/ipv4/route.c177
-rw-r--r--net/ipv4/syncookies.c19
-rw-r--r--net/ipv4/sysctl_net_ipv4.c153
-rw-r--r--net/ipv4/tcp.c568
-rw-r--r--net/ipv4/tcp_bbr.c56
-rw-r--r--net/ipv4/tcp_bic.c14
-rw-r--r--net/ipv4/tcp_bpf.c39
-rw-r--r--net/ipv4/tcp_cdg.c34
-rw-r--r--net/ipv4/tcp_cong.c32
-rw-r--r--net/ipv4/tcp_cubic.c55
-rw-r--r--net/ipv4/tcp_dctcp.c41
-rw-r--r--net/ipv4/tcp_diag.c18
-rw-r--r--net/ipv4/tcp_fastopen.c12
-rw-r--r--net/ipv4/tcp_highspeed.c18
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_hybla.c18
-rw-r--r--net/ipv4/tcp_illinois.c12
-rw-r--r--net/ipv4/tcp_input.c392
-rw-r--r--net/ipv4/tcp_ipv4.c367
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_metrics.c26
-rw-r--r--net/ipv4/tcp_minisocks.c41
-rw-r--r--net/ipv4/tcp_nv.c24
-rw-r--r--net/ipv4/tcp_offload.c26
-rw-r--r--net/ipv4/tcp_output.c220
-rw-r--r--net/ipv4/tcp_rate.c13
-rw-r--r--net/ipv4/tcp_recovery.c21
-rw-r--r--net/ipv4/tcp_scalable.c4
-rw-r--r--net/ipv4/tcp_timer.c51
-rw-r--r--net/ipv4/tcp_ulp.c3
-rw-r--r--net/ipv4/tcp_vegas.c21
-rw-r--r--net/ipv4/tcp_veno.c24
-rw-r--r--net/ipv4/tcp_westwood.c3
-rw-r--r--net/ipv4/tcp_yeah.c30
-rw-r--r--net/ipv4/udp.c131
-rw-r--r--net/ipv4/udp_bpf.c21
-rw-r--r--net/ipv4/udp_impl.h4
-rw-r--r--net/ipv4/udp_tunnel_core.c1
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv4/udplite.c11
-rw-r--r--net/ipv4/xfrm4_policy.c6
-rw-r--r--net/ipv4/xfrm4_protocol.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c10
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c425
-rw-r--r--net/ipv6/addrconf_core.c2
-rw-r--r--net/ipv6/addrlabel.c1
-rw-r--r--net/ipv6/af_inet6.c55
-rw-r--r--net/ipv6/ah6.c23
-rw-r--r--net/ipv6/datagram.c12
-rw-r--r--net/ipv6/esp6.c77
-rw-r--r--net/ipv6/esp6_offload.c9
-rw-r--r--net/ipv6/exthdrs.c52
-rw-r--r--net/ipv6/fib6_rules.c30
-rw-r--r--net/ipv6/icmp.c95
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/inet6_hashtables.c24
-rw-r--r--net/ipv6/ioam6.c20
-rw-r--r--net/ipv6/ioam6_iptunnel.c59
-rw-r--r--net/ipv6/ip6_fib.c23
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_gre.c142
-rw-r--r--net/ipv6/ip6_input.c67
-rw-r--r--net/ipv6/ip6_offload.c74
-rw-r--r--net/ipv6/ip6_output.c242
-rw-r--r--net/ipv6/ip6_tunnel.c107
-rw-r--r--net/ipv6/ip6_vti.c8
-rw-r--r--net/ipv6/ip6mr.c369
-rw-r--r--net/ipv6/ipcomp6.c10
-rw-r--r--net/ipv6/ipv6_sockglue.c153
-rw-r--r--net/ipv6/mcast.c74
-rw-r--r--net/ipv6/mip6.c14
-rw-r--r--net/ipv6/ndisc.c108
-rw-r--r--net/ipv6/netfilter.c14
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c0
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c13
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c1
-rw-r--r--net/ipv6/output_core.c2
-rw-r--r--net/ipv6/ping.c61
-rw-r--r--net/ipv6/raw.c126
-rw-r--r--net/ipv6/reassembly.c1
-rw-r--r--net/ipv6/route.c137
-rw-r--r--net/ipv6/seg6.c6
-rw-r--r--net/ipv6/seg6_hmac.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c145
-rw-r--r--net/ipv6/seg6_local.c392
-rw-r--r--net/ipv6/sit.c97
-rw-r--r--net/ipv6/syncookies.c6
-rw-r--r--net/ipv6/sysctl_net_ipv6.c6
-rw-r--r--net/ipv6/tcp_ipv6.c211
-rw-r--r--net/ipv6/udp.c192
-rw-r--r--net/ipv6/udp_impl.h5
-rw-r--r--net/ipv6/udplite.c12
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/ipv6/xfrm6_tunnel.c10
-rw-r--r--net/iucv/af_iucv.c5
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/kcm/kcmsock.c42
-rw-r--r--net/key/af_key.c27
-rw-r--r--net/l2tp/l2tp_debugfs.c6
-rw-r--r--net/l2tp/l2tp_eth.c4
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c17
-rw-r--r--net/l2tp/l2tp_netlink.c1
-rw-r--r--net/l2tp/l2tp_ppp.c5
-rw-r--r--net/l3mdev/l3mdev.c45
-rw-r--r--net/llc/af_llc.c53
-rw-r--r--net/mac80211/Makefile4
-rw-r--r--net/mac80211/agg-rx.c32
-rw-r--r--net/mac80211/agg-tx.c22
-rw-r--r--net/mac80211/airtime.c19
-rw-r--r--net/mac80211/cfg.c1173
-rw-r--r--net/mac80211/chan.c697
-rw-r--r--net/mac80211/debug.h33
-rw-r--r--net/mac80211/debugfs.c107
-rw-r--r--net/mac80211/debugfs_key.c10
-rw-r--r--net/mac80211/debugfs_netdev.c80
-rw-r--r--net/mac80211/debugfs_sta.c38
-rw-r--r--net/mac80211/driver-ops.c180
-rw-r--r--net/mac80211/driver-ops.h141
-rw-r--r--net/mac80211/eht.c79
-rw-r--r--net/mac80211/ethtool.c26
-rw-r--r--net/mac80211/he.c29
-rw-r--r--net/mac80211/ht.c70
-rw-r--r--net/mac80211/ibss.c137
-rw-r--r--net/mac80211/ieee80211_i.h770
-rw-r--r--net/mac80211/iface.c225
-rw-r--r--net/mac80211/key.c289
-rw-r--r--net/mac80211/key.h25
-rw-r--r--net/mac80211/link.c473
-rw-r--r--net/mac80211/main.c262
-rw-r--r--net/mac80211/mesh.c59
-rw-r--r--net/mac80211/mesh_hwmp.c15
-rw-r--r--net/mac80211/mesh_pathtbl.c2
-rw-r--r--net/mac80211/mesh_plink.c44
-rw-r--r--net/mac80211/mlme.c4364
-rw-r--r--net/mac80211/ocb.c17
-rw-r--r--net/mac80211/offchannel.c88
-rw-r--r--net/mac80211/rate.c36
-rw-r--r--net/mac80211/rate.h10
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c190
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h2
-rw-r--r--net/mac80211/rx.c601
-rw-r--r--net/mac80211/s1g.c7
-rw-r--r--net/mac80211/scan.c49
-rw-r--r--net/mac80211/spectmgmt.c16
-rw-r--r--net/mac80211/sta_info.c688
-rw-r--r--net/mac80211/sta_info.h208
-rw-r--r--net/mac80211/status.c230
-rw-r--r--net/mac80211/tdls.c70
-rw-r--r--net/mac80211/trace.h1175
-rw-r--r--net/mac80211/trace_msg.h6
-rw-r--r--net/mac80211/tx.c1164
-rw-r--r--net/mac80211/util.c812
-rw-r--r--net/mac80211/vht.c243
-rw-r--r--net/mac80211/wme.c7
-rw-r--r--net/mac80211/wpa.c240
-rw-r--r--net/mac80211/wpa.h5
-rw-r--r--net/mac802154/cfg.c1
-rw-r--r--net/mac802154/ieee802154_i.h2
-rw-r--r--net/mac802154/main.c54
-rw-r--r--net/mac802154/rx.c7
-rw-r--r--net/mac802154/util.c22
-rw-r--r--net/mctp/af_mctp.c252
-rw-r--r--net/mctp/device.c36
-rw-r--r--net/mctp/neigh.c2
-rw-r--r--net/mctp/route.c180
-rw-r--r--net/mctp/test/route-test.c169
-rw-r--r--net/mctp/test/utils.c1
-rw-r--r--net/mpls/af_mpls.c9
-rw-r--r--net/mptcp/Makefile4
-rw-r--r--net/mptcp/bpf.c21
-rw-r--r--net/mptcp/ctrl.c21
-rw-r--r--net/mptcp/mib.c11
-rw-r--r--net/mptcp/mib.h13
-rw-r--r--net/mptcp/mptcp_diag.c108
-rw-r--r--net/mptcp/options.c187
-rw-r--r--net/mptcp/pm.c128
-rw-r--r--net/mptcp/pm_netlink.c644
-rw-r--r--net/mptcp/pm_userspace.c454
-rw-r--r--net/mptcp/protocol.c641
-rw-r--r--net/mptcp/protocol.h212
-rw-r--r--net/mptcp/sockopt.c42
-rw-r--r--net/mptcp/subflow.c331
-rw-r--r--net/ncsi/ncsi-manage.c7
-rw-r--r--net/ncsi/ncsi-netlink.c1
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/Makefile12
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipset/ip_set_core.c12
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h30
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c34
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_mh.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_twos.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_conncount.c11
-rw-r--r--net/netfilter/nf_conntrack_acct.c19
-rw-r--r--net/netfilter/nf_conntrack_bpf.c513
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c604
-rw-r--r--net/netfilter/nf_conntrack_ecache.c225
-rw-r--r--net/netfilter/nf_conntrack_extend.c148
-rw-r--r--net/netfilter/nf_conntrack_ftp.c20
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c270
-rw-r--r--net/netfilter/nf_conntrack_helper.c98
-rw-r--r--net/netfilter/nf_conntrack_irc.c51
-rw-r--r--net/netfilter/nf_conntrack_labels.c20
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c242
-rw-r--r--net/netfilter/nf_conntrack_pptp.c60
-rw-r--r--net/netfilter/nf_conntrack_proto.c10
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c423
-rw-r--r--net/netfilter/nf_conntrack_sane.c68
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c16
-rw-r--r--net/netfilter/nf_conntrack_sip.c13
-rw-r--r--net/netfilter/nf_conntrack_standalone.c17
-rw-r--r--net/netfilter/nf_conntrack_timeout.c71
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c20
-rw-r--r--net/netfilter/nf_dup_netdev.c27
-rw-r--r--net/netfilter/nf_flow_table_core.c172
-rw-r--r--net/netfilter/nf_flow_table_inet.c17
-rw-r--r--net/netfilter/nf_flow_table_ip.c103
-rw-r--r--net/netfilter/nf_flow_table_offload.c64
-rw-r--r--net/netfilter/nf_flow_table_procfs.c80
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/nf_log_syslog.c144
-rw-r--r--net/netfilter/nf_nat_amanda.c14
-rw-r--r--net/netfilter/nf_nat_bpf.c79
-rw-r--r--net/netfilter/nf_nat_core.c88
-rw-r--r--net/netfilter/nf_nat_ftp.c17
-rw-r--r--net/netfilter/nf_nat_helper.c31
-rw-r--r--net/netfilter/nf_nat_irc.c16
-rw-r--r--net/netfilter/nf_nat_masquerade.c5
-rw-r--r--net/netfilter/nf_nat_sip.c14
-rw-r--r--net/netfilter/nf_queue.c36
-rw-r--r--net/netfilter/nf_synproxy_core.c26
-rw-r--r--net/netfilter/nf_tables_api.c698
-rw-r--r--net/netfilter/nf_tables_core.c47
-rw-r--r--net/netfilter/nf_tables_offload.c26
-rw-r--r--net/netfilter/nf_tables_trace.c44
-rw-r--r--net/netfilter/nfnetlink.c88
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c73
-rw-r--r--net/netfilter/nfnetlink_hook.c7
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_osf.c4
-rw-r--r--net/netfilter/nfnetlink_queue.c40
-rw-r--r--net/netfilter/nft_bitwise.c97
-rw-r--r--net/netfilter/nft_byteorder.c14
-rw-r--r--net/netfilter/nft_cmp.c161
-rw-r--r--net/netfilter/nft_compat.c10
-rw-r--r--net/netfilter/nft_connlimit.c16
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c57
-rw-r--r--net/netfilter/nft_dup_netdev.c7
-rw-r--r--net/netfilter/nft_dynset.c3
-rw-r--r--net/netfilter/nft_exthdr.c141
-rw-r--r--net/netfilter/nft_fib.c46
-rw-r--r--net/netfilter/nft_fib_inet.c1
-rw-r--r--net/netfilter/nft_fib_netdev.c1
-rw-r--r--net/netfilter/nft_flow_offload.c52
-rw-r--r--net/netfilter/nft_fwd_netdev.c10
-rw-r--r--net/netfilter/nft_hash.c36
-rw-r--r--net/netfilter/nft_immediate.c46
-rw-r--r--net/netfilter/nft_last.c5
-rw-r--r--net/netfilter/nft_limit.c26
-rw-r--r--net/netfilter/nft_log.c1
-rw-r--r--net/netfilter/nft_lookup.c12
-rw-r--r--net/netfilter/nft_masq.c3
-rw-r--r--net/netfilter/nft_meta.c32
-rw-r--r--net/netfilter/nft_nat.c5
-rw-r--r--net/netfilter/nft_numgen.c34
-rw-r--r--net/netfilter/nft_objref.c2
-rw-r--r--net/netfilter/nft_osf.c47
-rw-r--r--net/netfilter/nft_payload.c56
-rw-r--r--net/netfilter/nft_queue.c29
-rw-r--r--net/netfilter/nft_quota.c5
-rw-r--r--net/netfilter/nft_range.c28
-rw-r--r--net/netfilter/nft_redir.c3
-rw-r--r--net/netfilter/nft_reject_inet.c1
-rw-r--r--net/netfilter/nft_reject_netdev.c1
-rw-r--r--net/netfilter/nft_rt.c1
-rw-r--r--net/netfilter/nft_set_bitmap.c4
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/nft_set_pipapo.c48
-rw-r--r--net/netfilter/nft_set_rbtree.c6
-rw-r--r--net/netfilter/nft_socket.c104
-rw-r--r--net/netfilter/nft_synproxy.c5
-rw-r--r--net/netfilter/nft_tproxy.c15
-rw-r--r--net/netfilter/nft_tunnel.c32
-rw-r--r--net/netfilter/nft_xfrm.c36
-rw-r--r--net/netfilter/x_tables.c30
-rw-r--r--net/netfilter/xt_CT.c23
-rw-r--r--net/netfilter/xt_DSCP.c8
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c4
-rw-r--r--net/netfilter/xt_TPROXY.c25
-rw-r--r--net/netfilter/xt_connlimit.c6
-rw-r--r--net/netfilter/xt_hashlimit.c18
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/netfilter/xt_statistic.c2
-rw-r--r--net/netlabel/netlabel_calipso.c1
-rw-r--r--net/netlabel/netlabel_cipso_v4.c1
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/netlabel/netlabel_mgmt.c1
-rw-r--r--net/netlabel/netlabel_unlabeled.c3
-rw-r--r--net/netlink/af_netlink.c118
-rw-r--r--net/netlink/genetlink.c69
-rw-r--r--net/netlink/policy.c14
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/core.c34
-rw-r--r--net/nfc/hci/hcp.c12
-rw-r--r--net/nfc/llcp.h1
-rw-r--r--net/nfc/llcp_core.c9
-rw-r--r--net/nfc/llcp_sock.c57
-rw-r--r--net/nfc/nci/core.c4
-rw-r--r--net/nfc/nci/data.c2
-rw-r--r--net/nfc/nci/hci.c4
-rw-r--r--net/nfc/nci/uart.c5
-rw-r--r--net/nfc/netlink.c5
-rw-r--r--net/nfc/rawsock.c3
-rw-r--r--net/openvswitch/actions.c60
-rw-r--r--net/openvswitch/conntrack.c139
-rw-r--r--net/openvswitch/datapath.c68
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c145
-rw-r--r--net/openvswitch/flow.h14
-rw-r--r--net/openvswitch/flow_netlink.c144
-rw-r--r--net/openvswitch/meter.c15
-rw-r--r--net/openvswitch/vport-internal_dev.c5
-rw-r--r--net/openvswitch/vport-netdev.c6
-rw-r--r--net/openvswitch/vport.c2
-rw-r--r--net/openvswitch/vport.h4
-rw-r--r--net/packet/af_packet.c110
-rw-r--r--net/phonet/af_phonet.c8
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c7
-rw-r--r--net/psample/psample.c1
-rw-r--r--net/qrtr/af_qrtr.c3
-rw-r--r--net/qrtr/mhi.c12
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/bind.c2
-rw-r--r--net/rds/ib.c4
-rw-r--r--net/rds/ib_recv.c1
-rw-r--r--net/rds/message.c5
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/rds/rdma_transport.c4
-rw-r--r--net/rds/tcp.c22
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_connect.c5
-rw-r--r--net/rds/tcp_listen.c5
-rw-r--r--net/rfkill/core.c48
-rw-r--r--net/rose/af_rose.c20
-rw-r--r--net/rose/rose_link.c3
-rw-r--r--net/rose/rose_loopback.c3
-rw-r--r--net/rose/rose_route.c31
-rw-r--r--net/rose/rose_timer.c34
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h55
-rw-r--r--net/rxrpc/call_accept.c10
-rw-r--r--net/rxrpc/call_event.c17
-rw-r--r--net/rxrpc/call_object.c100
-rw-r--r--net/rxrpc/conn_client.c30
-rw-r--r--net/rxrpc/conn_object.c51
-rw-r--r--net/rxrpc/conn_service.c8
-rw-r--r--net/rxrpc/input.c62
-rw-r--r--net/rxrpc/local_object.c75
-rw-r--r--net/rxrpc/net_ns.c9
-rw-r--r--net/rxrpc/output.c22
-rw-r--r--net/rxrpc/peer_event.c293
-rw-r--r--net/rxrpc/peer_object.c40
-rw-r--r--net/rxrpc/proc.c85
-rw-r--r--net/rxrpc/protocol.h2
-rw-r--r--net/rxrpc/recvmsg.c51
-rw-r--r--net/rxrpc/rxkad.c4
-rw-r--r--net/rxrpc/sendmsg.c98
-rw-r--r--net/rxrpc/server_key.c7
-rw-r--r--net/rxrpc/skbuff.c1
-rw-r--r--net/rxrpc/sysctl.c4
-rw-r--r--net/sched/act_api.c76
-rw-r--r--net/sched/act_bpf.c32
-rw-r--r--net/sched/act_connmark.c28
-rw-r--r--net/sched/act_csum.c31
-rw-r--r--net/sched/act_ct.c209
-rw-r--r--net/sched/act_ctinfo.c28
-rw-r--r--net/sched/act_gact.c43
-rw-r--r--net/sched/act_gate.c31
-rw-r--r--net/sched/act_ife.c28
-rw-r--r--net/sched/act_ipt.c61
-rw-r--r--net/sched/act_mirred.c41
-rw-r--r--net/sched/act_mpls.c38
-rw-r--r--net/sched/act_nat.c28
-rw-r--r--net/sched/act_pedit.c62
-rw-r--r--net/sched/act_police.c96
-rw-r--r--net/sched/act_sample.c33
-rw-r--r--net/sched/act_simple.c28
-rw-r--r--net/sched/act_skbedit.c93
-rw-r--r--net/sched/act_skbmod.c28
-rw-r--r--net/sched/act_tunnel_key.c32
-rw-r--r--net/sched/act_vlan.c45
-rw-r--r--net/sched/cls_api.c119
-rw-r--r--net/sched/cls_basic.c16
-rw-r--r--net/sched/cls_bpf.c17
-rw-r--r--net/sched/cls_flow.c8
-rw-r--r--net/sched/cls_flower.c325
-rw-r--r--net/sched/cls_fw.c16
-rw-r--r--net/sched/cls_matchall.c31
-rw-r--r--net/sched/cls_route.c32
-rw-r--r--net/sched/cls_rsvp.h16
-rw-r--r--net/sched/cls_tcindex.c25
-rw-r--r--net/sched/cls_u32.c57
-rw-r--r--net/sched/em_meta.c7
-rw-r--r--net/sched/sch_api.c74
-rw-r--r--net/sched/sch_atm.c7
-rw-r--r--net/sched/sch_cake.c24
-rw-r--r--net/sched/sch_cbq.c92
-rw-r--r--net/sched/sch_cbs.c8
-rw-r--r--net/sched/sch_choke.c4
-rw-r--r--net/sched/sch_codel.c3
-rw-r--r--net/sched/sch_drr.c11
-rw-r--r--net/sched/sch_dsmark.c16
-rw-r--r--net/sched/sch_etf.c6
-rw-r--r--net/sched/sch_ets.c16
-rw-r--r--net/sched/sch_fq.c3
-rw-r--r--net/sched/sch_fq_codel.c13
-rw-r--r--net/sched/sch_fq_pie.c6
-rw-r--r--net/sched/sch_generic.c95
-rw-r--r--net/sched/sch_gred.c13
-rw-r--r--net/sched/sch_hfsc.c13
-rw-r--r--net/sched/sch_hhf.c3
-rw-r--r--net/sched/sch_htb.c69
-rw-r--r--net/sched/sch_mq.c5
-rw-r--r--net/sched/sch_mqprio.c5
-rw-r--r--net/sched/sch_multiq.c10
-rw-r--r--net/sched/sch_netem.c37
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_plug.c3
-rw-r--r--net/sched/sch_prio.c13
-rw-r--r--net/sched/sch_qfq.c11
-rw-r--r--net/sched/sch_red.c17
-rw-r--r--net/sched/sch_sfb.c27
-rw-r--r--net/sched/sch_sfq.c8
-rw-r--r--net/sched/sch_skbprio.c12
-rw-r--r--net/sched/sch_taprio.c309
-rw-r--r--net/sched/sch_tbf.c13
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/auth.c18
-rw-r--r--net/sctp/diag.c9
-rw-r--r--net/sctp/input.c4
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/output.c3
-rw-r--r--net/sctp/outqueue.c19
-rw-r--r--net/sctp/protocol.c6
-rw-r--r--net/sctp/sm_sideeffect.c4
-rw-r--r--net/sctp/sm_statefuns.c14
-rw-r--r--net/sctp/socket.c36
-rw-r--r--net/sctp/stream.c19
-rw-r--r--net/sctp/stream_interleave.c2
-rw-r--r--net/sctp/stream_sched.c11
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/sctp/ulpqueue.c4
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c655
-rw-r--r--net/smc/smc.h67
-rw-r--r--net/smc/smc_cdc.c29
-rw-r--r--net/smc/smc_clc.c16
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_close.c8
-rw-r--r--net/smc/smc_core.c401
-rw-r--r--net/smc/smc_core.h34
-rw-r--r--net/smc/smc_diag.c9
-rw-r--r--net/smc/smc_ib.c45
-rw-r--r--net/smc/smc_ib.h2
-rw-r--r--net/smc/smc_ism.c19
-rw-r--r--net/smc/smc_ism.h20
-rw-r--r--net/smc/smc_llc.c35
-rw-r--r--net/smc/smc_llc.h1
-rw-r--r--net/smc/smc_netlink.c18
-rw-r--r--net/smc/smc_pnet.c67
-rw-r--r--net/smc/smc_pnet.h2
-rw-r--r--net/smc/smc_rx.c96
-rw-r--r--net/smc/smc_sysctl.c111
-rw-r--r--net/smc/smc_sysctl.h33
-rw-r--r--net/smc/smc_tx.c187
-rw-r--r--net/smc/smc_tx.h3
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/smc/smc_wr.h9
-rw-r--r--net/socket.c207
-rw-r--r--net/strparser/strparser.c3
-rw-r--r--net/sunrpc/auth.c12
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c39
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c10
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c8
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c8
-rw-r--r--net/sunrpc/auth_unix.c16
-rw-r--r--net/sunrpc/backchannel_rqst.c38
-rw-r--r--net/sunrpc/cache.c44
-rw-r--r--net/sunrpc/clnt.c350
-rw-r--r--net/sunrpc/debugfs.c3
-rw-r--r--net/sunrpc/fail.h2
-rw-r--r--net/sunrpc/rpc_pipe.c6
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sched.c125
-rw-r--r--net/sunrpc/socklib.c7
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/sunrpc.h16
-rw-r--r--net/sunrpc/svc.c283
-rw-r--r--net/sunrpc/svc_xprt.c105
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcauth_unix.c60
-rw-r--r--net/sunrpc/svcsock.c34
-rw-r--r--net/sunrpc/sysfs.c128
-rw-r--r--net/sunrpc/xdr.c229
-rw-r--r--net/sunrpc/xprt.c119
-rw-r--r--net/sunrpc/xprtmultipath.c115
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c26
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c19
-rw-r--r--net/sunrpc/xprtrdma/verbs.c78
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h10
-rw-r--r--net/sunrpc/xprtsock.c291
-rw-r--r--net/switchdev/switchdev.c236
-rw-r--r--net/tipc/bearer.c21
-rw-r--r--net/tipc/core.c3
-rw-r--r--net/tipc/crypto.c2
-rw-r--r--net/tipc/discover.c2
-rw-r--r--net/tipc/link.c14
-rw-r--r--net/tipc/monitor.c4
-rw-r--r--net/tipc/msg.h23
-rw-r--r--net/tipc/name_distr.c10
-rw-r--r--net/tipc/name_table.c13
-rw-r--r--net/tipc/name_table.h1
-rw-r--r--net/tipc/netlink.c1
-rw-r--r--net/tipc/netlink_compat.c3
-rw-r--r--net/tipc/node.c54
-rw-r--r--net/tipc/socket.c10
-rw-r--r--net/tipc/topsrv.c18
-rw-r--r--net/tls/Makefile2
-rw-r--r--net/tls/tls.h321
-rw-r--r--net/tls/tls_device.c345
-rw-r--r--net/tls/tls_device_fallback.c90
-rw-r--r--net/tls/tls_main.c267
-rw-r--r--net/tls/tls_proc.c4
-rw-r--r--net/tls/tls_strp.c518
-rw-r--r--net/tls/tls_sw.c1145
-rw-r--r--net/tls/tls_toe.c2
-rw-r--r--net/unix/af_unix.c652
-rw-r--r--net/unix/diag.c49
-rw-r--r--net/unix/garbage.c34
-rw-r--r--net/unix/scm.c6
-rw-r--r--net/unix/sysctl_net_unix.c19
-rw-r--r--net/unix/unix_bpf.c13
-rw-r--r--net/vmw_vsock/af_vsock.c60
-rw-r--r--net/vmw_vsock/hyperv_transport.c28
-rw-r--r--net/vmw_vsock/virtio_transport.c207
-rw-r--r--net/vmw_vsock/virtio_transport_common.c9
-rw-r--r--net/vmw_vsock/vmci_transport.c12
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c10
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c12
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/ap.c46
-rw-r--r--net/wireless/chan.c390
-rw-r--r--net/wireless/core.c70
-rw-r--r--net/wireless/core.h45
-rw-r--r--net/wireless/debugfs.c3
-rw-r--r--net/wireless/ethtool.c12
-rw-r--r--net/wireless/ibss.c63
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c2
-rw-r--r--net/wireless/mesh.c31
-rw-r--r--net/wireless/mlme.c308
-rw-r--r--net/wireless/nl80211.c2252
-rw-r--r--net/wireless/nl80211.h9
-rw-r--r--net/wireless/ocb.c5
-rw-r--r--net/wireless/pmsr.c4
-rw-r--r--net/wireless/rdev-ops.h182
-rw-r--r--net/wireless/reg.c165
-rw-r--r--net/wireless/scan.c102
-rw-r--r--net/wireless/sme.c524
-rw-r--r--net/wireless/trace.h666
-rw-r--r--net/wireless/util.c286
-rw-r--r--net/wireless/wext-compat.c66
-rw-r--r--net/wireless/wext-core.c17
-rw-r--r--net/wireless/wext-sme.c29
-rw-r--r--net/x25/af_x25.c19
-rw-r--r--net/x25/x25_proc.c3
-rw-r--r--net/xdp/xdp_umem.c8
-rw-r--r--net/xdp/xsk.c139
-rw-r--r--net/xdp/xsk_buff_pool.c45
-rw-r--r--net/xdp/xsk_queue.h47
-rw-r--r--net/xdp/xskmap.c6
-rw-r--r--net/xfrm/espintcp.c8
-rw-r--r--net/xfrm/xfrm_device.c53
-rw-r--r--net/xfrm/xfrm_input.c28
-rw-r--r--net/xfrm/xfrm_interface.c215
-rw-r--r--net/xfrm/xfrm_ipcomp.c11
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c66
-rw-r--r--net/xfrm/xfrm_replay.c10
-rw-r--r--net/xfrm/xfrm_state.c74
-rw-r--r--net/xfrm/xfrm_user.c451
950 files changed, 57643 insertions, 38402 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index a068757eabaf..7b3341cef926 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -5,6 +5,7 @@
* (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
*/
+#include <linux/if_arp.h>
#include <linux/module.h>
#include <net/6lowpan.h>
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
index d6bbbd4ab38b..7b374595328d 100644
--- a/net/6lowpan/nhc.c
+++ b/net/6lowpan/nhc.c
@@ -12,77 +12,26 @@
#include "nhc.h"
-static struct rb_root rb_root = RB_ROOT;
-static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1];
+static const struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1];
static DEFINE_SPINLOCK(lowpan_nhc_lock);
-static int lowpan_nhc_insert(struct lowpan_nhc *nhc)
+static const struct lowpan_nhc *lowpan_nhc_by_nhcid(struct sk_buff *skb)
{
- struct rb_node **new = &rb_root.rb_node, *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct lowpan_nhc *this = rb_entry(*new, struct lowpan_nhc,
- node);
- int result, len_dif, len;
-
- len_dif = nhc->idlen - this->idlen;
-
- if (nhc->idlen < this->idlen)
- len = nhc->idlen;
- else
- len = this->idlen;
-
- result = memcmp(nhc->id, this->id, len);
- if (!result)
- result = len_dif;
-
- parent = *new;
- if (result < 0)
- new = &((*new)->rb_left);
- else if (result > 0)
- new = &((*new)->rb_right);
- else
- return -EEXIST;
- }
+ const struct lowpan_nhc *nhc;
+ int i;
+ u8 id;
- /* Add new node and rebalance tree. */
- rb_link_node(&nhc->node, parent, new);
- rb_insert_color(&nhc->node, &rb_root);
+ if (!pskb_may_pull(skb, 1))
+ return NULL;
- return 0;
-}
+ id = *skb->data;
-static void lowpan_nhc_remove(struct lowpan_nhc *nhc)
-{
- rb_erase(&nhc->node, &rb_root);
-}
+ for (i = 0; i < NEXTHDR_MAX + 1; i++) {
+ nhc = lowpan_nexthdr_nhcs[i];
+ if (!nhc)
+ continue;
-static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb)
-{
- struct rb_node *node = rb_root.rb_node;
- const u8 *nhcid_skb_ptr = skb->data;
-
- while (node) {
- struct lowpan_nhc *nhc = rb_entry(node, struct lowpan_nhc,
- node);
- u8 nhcid_skb_ptr_masked[LOWPAN_NHC_MAX_ID_LEN];
- int result, i;
-
- if (nhcid_skb_ptr + nhc->idlen > skb->data + skb->len)
- return NULL;
-
- /* copy and mask afterwards the nhid value from skb */
- memcpy(nhcid_skb_ptr_masked, nhcid_skb_ptr, nhc->idlen);
- for (i = 0; i < nhc->idlen; i++)
- nhcid_skb_ptr_masked[i] &= nhc->idmask[i];
-
- result = memcmp(nhcid_skb_ptr_masked, nhc->id, nhc->idlen);
- if (result < 0)
- node = node->rb_left;
- else if (result > 0)
- node = node->rb_right;
- else
+ if ((id & nhc->idmask) == nhc->id)
return nhc;
}
@@ -92,7 +41,7 @@ static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb)
int lowpan_nhc_check_compression(struct sk_buff *skb,
const struct ipv6hdr *hdr, u8 **hc_ptr)
{
- struct lowpan_nhc *nhc;
+ const struct lowpan_nhc *nhc;
int ret = 0;
spin_lock_bh(&lowpan_nhc_lock);
@@ -110,7 +59,7 @@ int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
u8 **hc_ptr)
{
int ret;
- struct lowpan_nhc *nhc;
+ const struct lowpan_nhc *nhc;
spin_lock_bh(&lowpan_nhc_lock);
@@ -153,7 +102,7 @@ int lowpan_nhc_do_uncompression(struct sk_buff *skb,
const struct net_device *dev,
struct ipv6hdr *hdr)
{
- struct lowpan_nhc *nhc;
+ const struct lowpan_nhc *nhc;
int ret;
spin_lock_bh(&lowpan_nhc_lock);
@@ -189,18 +138,9 @@ int lowpan_nhc_do_uncompression(struct sk_buff *skb,
return 0;
}
-int lowpan_nhc_add(struct lowpan_nhc *nhc)
+int lowpan_nhc_add(const struct lowpan_nhc *nhc)
{
- int ret;
-
- if (!nhc->idlen || !nhc->idsetup)
- return -EINVAL;
-
- WARN_ONCE(nhc->idlen > LOWPAN_NHC_MAX_ID_LEN,
- "LOWPAN_NHC_MAX_ID_LEN should be updated to %zd.\n",
- nhc->idlen);
-
- nhc->idsetup(nhc);
+ int ret = 0;
spin_lock_bh(&lowpan_nhc_lock);
@@ -209,10 +149,6 @@ int lowpan_nhc_add(struct lowpan_nhc *nhc)
goto out;
}
- ret = lowpan_nhc_insert(nhc);
- if (ret < 0)
- goto out;
-
lowpan_nexthdr_nhcs[nhc->nexthdr] = nhc;
out:
spin_unlock_bh(&lowpan_nhc_lock);
@@ -220,11 +156,10 @@ out:
}
EXPORT_SYMBOL(lowpan_nhc_add);
-void lowpan_nhc_del(struct lowpan_nhc *nhc)
+void lowpan_nhc_del(const struct lowpan_nhc *nhc)
{
spin_lock_bh(&lowpan_nhc_lock);
- lowpan_nhc_remove(nhc);
lowpan_nexthdr_nhcs[nhc->nexthdr] = NULL;
spin_unlock_bh(&lowpan_nhc_lock);
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index 67951c40734b..ab7b4977c32b 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -16,24 +16,20 @@
* @_name: const char * of common header compression name.
* @_nexthdr: ipv6 nexthdr field for the header compression.
* @_nexthdrlen: ipv6 nexthdr len for the reserved space.
- * @_idsetup: callback to setup id and mask values.
- * @_idlen: len for the next header id and mask, should be always the same.
+ * @_id: one byte nhc id value.
+ * @_idmask: one byte nhc id mask value.
* @_uncompress: callback for uncompression call.
* @_compress: callback for compression call.
*/
#define LOWPAN_NHC(__nhc, _name, _nexthdr, \
- _hdrlen, _idsetup, _idlen, \
+ _hdrlen, _id, _idmask, \
_uncompress, _compress) \
-static u8 __nhc##_val[_idlen]; \
-static u8 __nhc##_mask[_idlen]; \
-static struct lowpan_nhc __nhc = { \
+static const struct lowpan_nhc __nhc = { \
.name = _name, \
.nexthdr = _nexthdr, \
.nexthdrlen = _hdrlen, \
- .id = __nhc##_val, \
- .idmask = __nhc##_mask, \
- .idlen = _idlen, \
- .idsetup = _idsetup, \
+ .id = _id, \
+ .idmask = _idmask, \
.uncompress = _uncompress, \
.compress = _compress, \
}
@@ -53,27 +49,21 @@ module_exit(__nhc##_exit);
/**
* struct lowpan_nhc - hold 6lowpan next hdr compression ifnformation
*
- * @node: holder for the rbtree.
* @name: name of the specific next header compression
* @nexthdr: next header value of the protocol which should be compressed.
* @nexthdrlen: ipv6 nexthdr len for the reserved space.
- * @id: array for nhc id. Note this need to be in network byteorder.
- * @mask: array for nhc id mask. Note this need to be in network byteorder.
- * @len: the length of the next header id and mask.
- * @setup: callback to setup fill the next header id value and mask.
+ * @id: one byte nhc id value.
+ * @idmask: one byte nhc id mask value.
* @compress: callback to do the header compression.
* @uncompress: callback to do the header uncompression.
*/
struct lowpan_nhc {
- struct rb_node node;
const char *name;
- const u8 nexthdr;
- const size_t nexthdrlen;
- u8 *id;
- u8 *idmask;
- const size_t idlen;
+ u8 nexthdr;
+ size_t nexthdrlen;
+ u8 id;
+ u8 idmask;
- void (*idsetup)(struct lowpan_nhc *nhc);
int (*uncompress)(struct sk_buff *skb, size_t needed);
int (*compress)(struct sk_buff *skb, u8 **hc_ptr);
};
@@ -126,14 +116,14 @@ int lowpan_nhc_do_uncompression(struct sk_buff *skb,
*
* @nhc: nhc which should be add.
*/
-int lowpan_nhc_add(struct lowpan_nhc *nhc);
+int lowpan_nhc_add(const struct lowpan_nhc *nhc);
/**
* lowpan_nhc_del - delete a next header compression from framework
*
* @nhc: nhc which should be delete.
*/
-void lowpan_nhc_del(struct lowpan_nhc *nhc);
+void lowpan_nhc_del(const struct lowpan_nhc *nhc);
/**
* lowpan_nhc_init - adding all default nhcs
diff --git a/net/6lowpan/nhc_dest.c b/net/6lowpan/nhc_dest.c
index 4768a9459212..0cbcc7806469 100644
--- a/net/6lowpan/nhc_dest.c
+++ b/net/6lowpan/nhc_dest.c
@@ -6,18 +6,11 @@
#include "nhc.h"
-#define LOWPAN_NHC_DEST_IDLEN 1
#define LOWPAN_NHC_DEST_ID_0 0xe6
#define LOWPAN_NHC_DEST_MASK_0 0xfe
-static void dest_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_DEST_ID_0;
- nhc->idmask[0] = LOWPAN_NHC_DEST_MASK_0;
-}
-
LOWPAN_NHC(nhc_dest, "RFC6282 Destination Options", NEXTHDR_DEST, 0,
- dest_nhid_setup, LOWPAN_NHC_DEST_IDLEN, NULL, NULL);
+ LOWPAN_NHC_DEST_ID_0, LOWPAN_NHC_DEST_MASK_0, NULL, NULL);
module_lowpan_nhc(nhc_dest);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Destination Options compression");
diff --git a/net/6lowpan/nhc_fragment.c b/net/6lowpan/nhc_fragment.c
index be85f07715bd..9414552df0ac 100644
--- a/net/6lowpan/nhc_fragment.c
+++ b/net/6lowpan/nhc_fragment.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_NHC_FRAGMENT_IDLEN 1
#define LOWPAN_NHC_FRAGMENT_ID_0 0xe4
#define LOWPAN_NHC_FRAGMENT_MASK_0 0xfe
-static void fragment_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_FRAGMENT_ID_0;
- nhc->idmask[0] = LOWPAN_NHC_FRAGMENT_MASK_0;
-}
-
LOWPAN_NHC(nhc_fragment, "RFC6282 Fragment", NEXTHDR_FRAGMENT, 0,
- fragment_nhid_setup, LOWPAN_NHC_FRAGMENT_IDLEN, NULL, NULL);
+ LOWPAN_NHC_FRAGMENT_ID_0, LOWPAN_NHC_FRAGMENT_MASK_0, NULL, NULL);
module_lowpan_nhc(nhc_fragment);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Fragment compression");
diff --git a/net/6lowpan/nhc_ghc_ext_dest.c b/net/6lowpan/nhc_ghc_ext_dest.c
index a9137f1733be..e4745ddd10a8 100644
--- a/net/6lowpan/nhc_ghc_ext_dest.c
+++ b/net/6lowpan/nhc_ghc_ext_dest.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_GHC_EXT_DEST_IDLEN 1
#define LOWPAN_GHC_EXT_DEST_ID_0 0xb6
#define LOWPAN_GHC_EXT_DEST_MASK_0 0xfe
-static void dest_ghid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_GHC_EXT_DEST_ID_0;
- nhc->idmask[0] = LOWPAN_GHC_EXT_DEST_MASK_0;
-}
-
LOWPAN_NHC(ghc_ext_dest, "RFC7400 Destination Extension Header", NEXTHDR_DEST,
- 0, dest_ghid_setup, LOWPAN_GHC_EXT_DEST_IDLEN, NULL, NULL);
+ 0, LOWPAN_GHC_EXT_DEST_ID_0, LOWPAN_GHC_EXT_DEST_MASK_0, NULL, NULL);
module_lowpan_nhc(ghc_ext_dest);
MODULE_DESCRIPTION("6LoWPAN generic header destination extension compression");
diff --git a/net/6lowpan/nhc_ghc_ext_frag.c b/net/6lowpan/nhc_ghc_ext_frag.c
index d49b745918e0..220e5abfa946 100644
--- a/net/6lowpan/nhc_ghc_ext_frag.c
+++ b/net/6lowpan/nhc_ghc_ext_frag.c
@@ -5,19 +5,12 @@
#include "nhc.h"
-#define LOWPAN_GHC_EXT_FRAG_IDLEN 1
#define LOWPAN_GHC_EXT_FRAG_ID_0 0xb4
#define LOWPAN_GHC_EXT_FRAG_MASK_0 0xfe
-static void frag_ghid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_GHC_EXT_FRAG_ID_0;
- nhc->idmask[0] = LOWPAN_GHC_EXT_FRAG_MASK_0;
-}
-
LOWPAN_NHC(ghc_ext_frag, "RFC7400 Fragmentation Extension Header",
- NEXTHDR_FRAGMENT, 0, frag_ghid_setup,
- LOWPAN_GHC_EXT_FRAG_IDLEN, NULL, NULL);
+ NEXTHDR_FRAGMENT, 0, LOWPAN_GHC_EXT_FRAG_ID_0,
+ LOWPAN_GHC_EXT_FRAG_MASK_0, NULL, NULL);
module_lowpan_nhc(ghc_ext_frag);
MODULE_DESCRIPTION("6LoWPAN generic header fragmentation extension compression");
diff --git a/net/6lowpan/nhc_ghc_ext_hop.c b/net/6lowpan/nhc_ghc_ext_hop.c
index 3beedf5140a3..9b0de4da7379 100644
--- a/net/6lowpan/nhc_ghc_ext_hop.c
+++ b/net/6lowpan/nhc_ghc_ext_hop.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_GHC_EXT_HOP_IDLEN 1
#define LOWPAN_GHC_EXT_HOP_ID_0 0xb0
#define LOWPAN_GHC_EXT_HOP_MASK_0 0xfe
-static void hop_ghid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_GHC_EXT_HOP_ID_0;
- nhc->idmask[0] = LOWPAN_GHC_EXT_HOP_MASK_0;
-}
-
LOWPAN_NHC(ghc_ext_hop, "RFC7400 Hop-by-Hop Extension Header", NEXTHDR_HOP, 0,
- hop_ghid_setup, LOWPAN_GHC_EXT_HOP_IDLEN, NULL, NULL);
+ LOWPAN_GHC_EXT_HOP_ID_0, LOWPAN_GHC_EXT_HOP_MASK_0, NULL, NULL);
module_lowpan_nhc(ghc_ext_hop);
MODULE_DESCRIPTION("6LoWPAN generic header hop-by-hop extension compression");
diff --git a/net/6lowpan/nhc_ghc_ext_route.c b/net/6lowpan/nhc_ghc_ext_route.c
index 70dc0ea3cf66..3e86faec59c9 100644
--- a/net/6lowpan/nhc_ghc_ext_route.c
+++ b/net/6lowpan/nhc_ghc_ext_route.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_GHC_EXT_ROUTE_IDLEN 1
#define LOWPAN_GHC_EXT_ROUTE_ID_0 0xb2
#define LOWPAN_GHC_EXT_ROUTE_MASK_0 0xfe
-static void route_ghid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_GHC_EXT_ROUTE_ID_0;
- nhc->idmask[0] = LOWPAN_GHC_EXT_ROUTE_MASK_0;
-}
-
LOWPAN_NHC(ghc_ext_route, "RFC7400 Routing Extension Header", NEXTHDR_ROUTING,
- 0, route_ghid_setup, LOWPAN_GHC_EXT_ROUTE_IDLEN, NULL, NULL);
+ 0, LOWPAN_GHC_EXT_ROUTE_ID_0, LOWPAN_GHC_EXT_ROUTE_MASK_0, NULL, NULL);
module_lowpan_nhc(ghc_ext_route);
MODULE_DESCRIPTION("6LoWPAN generic header routing extension compression");
diff --git a/net/6lowpan/nhc_ghc_icmpv6.c b/net/6lowpan/nhc_ghc_icmpv6.c
index 339ceffc25a9..1634f3eb0be8 100644
--- a/net/6lowpan/nhc_ghc_icmpv6.c
+++ b/net/6lowpan/nhc_ghc_icmpv6.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_GHC_ICMPV6_IDLEN 1
#define LOWPAN_GHC_ICMPV6_ID_0 0xdf
#define LOWPAN_GHC_ICMPV6_MASK_0 0xff
-static void icmpv6_ghid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_GHC_ICMPV6_ID_0;
- nhc->idmask[0] = LOWPAN_GHC_ICMPV6_MASK_0;
-}
-
LOWPAN_NHC(ghc_icmpv6, "RFC7400 ICMPv6", NEXTHDR_ICMP, 0,
- icmpv6_ghid_setup, LOWPAN_GHC_ICMPV6_IDLEN, NULL, NULL);
+ LOWPAN_GHC_ICMPV6_ID_0, LOWPAN_GHC_ICMPV6_MASK_0, NULL, NULL);
module_lowpan_nhc(ghc_icmpv6);
MODULE_DESCRIPTION("6LoWPAN generic header ICMPv6 compression");
diff --git a/net/6lowpan/nhc_ghc_udp.c b/net/6lowpan/nhc_ghc_udp.c
index f47fec601e73..4ac4813b77ad 100644
--- a/net/6lowpan/nhc_ghc_udp.c
+++ b/net/6lowpan/nhc_ghc_udp.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_GHC_UDP_IDLEN 1
#define LOWPAN_GHC_UDP_ID_0 0xd0
#define LOWPAN_GHC_UDP_MASK_0 0xf8
-static void udp_ghid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_GHC_UDP_ID_0;
- nhc->idmask[0] = LOWPAN_GHC_UDP_MASK_0;
-}
-
LOWPAN_NHC(ghc_udp, "RFC7400 UDP", NEXTHDR_UDP, 0,
- udp_ghid_setup, LOWPAN_GHC_UDP_IDLEN, NULL, NULL);
+ LOWPAN_GHC_UDP_ID_0, LOWPAN_GHC_UDP_MASK_0, NULL, NULL);
module_lowpan_nhc(ghc_udp);
MODULE_DESCRIPTION("6LoWPAN generic header UDP compression");
diff --git a/net/6lowpan/nhc_hop.c b/net/6lowpan/nhc_hop.c
index 158fc1906327..182087dfd09d 100644
--- a/net/6lowpan/nhc_hop.c
+++ b/net/6lowpan/nhc_hop.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_NHC_HOP_IDLEN 1
#define LOWPAN_NHC_HOP_ID_0 0xe0
#define LOWPAN_NHC_HOP_MASK_0 0xfe
-static void hop_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_HOP_ID_0;
- nhc->idmask[0] = LOWPAN_NHC_HOP_MASK_0;
-}
-
LOWPAN_NHC(nhc_hop, "RFC6282 Hop-by-Hop Options", NEXTHDR_HOP, 0,
- hop_nhid_setup, LOWPAN_NHC_HOP_IDLEN, NULL, NULL);
+ LOWPAN_NHC_HOP_ID_0, LOWPAN_NHC_HOP_MASK_0, NULL, NULL);
module_lowpan_nhc(nhc_hop);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Hop-by-Hop Options compression");
diff --git a/net/6lowpan/nhc_ipv6.c b/net/6lowpan/nhc_ipv6.c
index 08b7589e5b38..20242360b1d4 100644
--- a/net/6lowpan/nhc_ipv6.c
+++ b/net/6lowpan/nhc_ipv6.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_NHC_IPV6_IDLEN 1
#define LOWPAN_NHC_IPV6_ID_0 0xee
#define LOWPAN_NHC_IPV6_MASK_0 0xfe
-static void ipv6_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_IPV6_ID_0;
- nhc->idmask[0] = LOWPAN_NHC_IPV6_MASK_0;
-}
-
-LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, ipv6_nhid_setup,
- LOWPAN_NHC_IPV6_IDLEN, NULL, NULL);
+LOWPAN_NHC(nhc_ipv6, "RFC6282 IPv6", NEXTHDR_IPV6, 0, LOWPAN_NHC_IPV6_ID_0,
+ LOWPAN_NHC_IPV6_MASK_0, NULL, NULL);
module_lowpan_nhc(nhc_ipv6);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 IPv6 compression");
diff --git a/net/6lowpan/nhc_mobility.c b/net/6lowpan/nhc_mobility.c
index ac8fca689828..1c31d872c804 100644
--- a/net/6lowpan/nhc_mobility.c
+++ b/net/6lowpan/nhc_mobility.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_NHC_MOBILITY_IDLEN 1
#define LOWPAN_NHC_MOBILITY_ID_0 0xe8
#define LOWPAN_NHC_MOBILITY_MASK_0 0xfe
-static void mobility_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_MOBILITY_ID_0;
- nhc->idmask[0] = LOWPAN_NHC_MOBILITY_MASK_0;
-}
-
LOWPAN_NHC(nhc_mobility, "RFC6282 Mobility", NEXTHDR_MOBILITY, 0,
- mobility_nhid_setup, LOWPAN_NHC_MOBILITY_IDLEN, NULL, NULL);
+ LOWPAN_NHC_MOBILITY_ID_0, LOWPAN_NHC_MOBILITY_MASK_0, NULL, NULL);
module_lowpan_nhc(nhc_mobility);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Mobility compression");
diff --git a/net/6lowpan/nhc_routing.c b/net/6lowpan/nhc_routing.c
index 1c174023de42..dae03ebf7021 100644
--- a/net/6lowpan/nhc_routing.c
+++ b/net/6lowpan/nhc_routing.c
@@ -5,18 +5,11 @@
#include "nhc.h"
-#define LOWPAN_NHC_ROUTING_IDLEN 1
#define LOWPAN_NHC_ROUTING_ID_0 0xe2
#define LOWPAN_NHC_ROUTING_MASK_0 0xfe
-static void routing_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_ROUTING_ID_0;
- nhc->idmask[0] = LOWPAN_NHC_ROUTING_MASK_0;
-}
-
LOWPAN_NHC(nhc_routing, "RFC6282 Routing", NEXTHDR_ROUTING, 0,
- routing_nhid_setup, LOWPAN_NHC_ROUTING_IDLEN, NULL, NULL);
+ LOWPAN_NHC_ROUTING_ID_0, LOWPAN_NHC_ROUTING_MASK_0, NULL, NULL);
module_lowpan_nhc(nhc_routing);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 Routing compression");
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 33f17bd8cda7..0a506c77283d 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -14,7 +14,6 @@
#define LOWPAN_NHC_UDP_MASK 0xF8
#define LOWPAN_NHC_UDP_ID 0xF0
-#define LOWPAN_NHC_UDP_IDLEN 1
#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0
#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0
@@ -169,14 +168,8 @@ static int udp_compress(struct sk_buff *skb, u8 **hc_ptr)
return 0;
}
-static void udp_nhid_setup(struct lowpan_nhc *nhc)
-{
- nhc->id[0] = LOWPAN_NHC_UDP_ID;
- nhc->idmask[0] = LOWPAN_NHC_UDP_MASK;
-}
-
LOWPAN_NHC(nhc_udp, "RFC6282 UDP", NEXTHDR_UDP, sizeof(struct udphdr),
- udp_nhid_setup, LOWPAN_NHC_UDP_IDLEN, udp_uncompress, udp_compress);
+ LOWPAN_NHC_UDP_ID, LOWPAN_NHC_UDP_MASK, udp_uncompress, udp_compress);
module_lowpan_nhc(nhc_udp);
MODULE_DESCRIPTION("6LoWPAN next header RFC6282 UDP compression");
diff --git a/net/802/garp.c b/net/802/garp.c
index f6012f8e59f0..fc9eb02a912f 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -407,7 +407,7 @@ static void garp_join_timer_arm(struct garp_applicant *app)
{
unsigned long delay;
- delay = (u64)msecs_to_jiffies(garp_join_time) * prandom_u32() >> 32;
+ delay = prandom_u32_max(msecs_to_jiffies(garp_join_time));
mod_timer(&app->join_timer, jiffies + delay);
}
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 35e04cc5390c..155f74d8b14f 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -592,7 +592,7 @@ static void mrp_join_timer_arm(struct mrp_applicant *app)
{
unsigned long delay;
- delay = (u64)msecs_to_jiffies(mrp_join_time) * prandom_u32() >> 32;
+ delay = prandom_u32_max(msecs_to_jiffies(mrp_join_time));
mod_timer(&app->join_timer, jiffies + delay);
}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 788076b002b3..e40aa3e3641c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -319,8 +319,7 @@ static void vlan_transfer_features(struct net_device *dev,
{
struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
- netif_set_gso_max_size(vlandev, dev->gso_max_size);
- netif_set_gso_max_segs(vlandev, dev->gso_max_segs);
+ netif_inherit_tso_max(vlandev, dev);
if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
vlandev->hard_header_len = dev->hard_header_len;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 1a705a4ef7fa..5eaf38875554 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -129,6 +129,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
+void vlan_dev_free_egress_priority(const struct net_device *dev);
int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
size_t size);
@@ -139,7 +140,6 @@ int vlan_check_real_dev(struct net_device *real_dev,
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
-void vlan_dev_uninit(struct net_device *dev);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index acf8c791f320..0beb44f2fe1f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -63,10 +63,10 @@ bool vlan_do_receive(struct sk_buff **skbp)
rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
u64_stats_update_begin(&rx_stats->syncp);
- rx_stats->rx_packets++;
- rx_stats->rx_bytes += skb->len;
+ u64_stats_inc(&rx_stats->rx_packets);
+ u64_stats_add(&rx_stats->rx_bytes, skb->len);
if (skb->pkt_type == PACKET_MULTICAST)
- rx_stats->rx_multicast++;
+ u64_stats_inc(&rx_stats->rx_multicast);
u64_stats_update_end(&rx_stats->syncp);
return true;
@@ -467,12 +467,9 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
off_vlan = skb_gro_offset(skb);
hlen = off_vlan + sizeof(*vhdr);
- vhdr = skb_gro_header_fast(skb, off_vlan);
- if (skb_gro_header_hard(skb, hlen)) {
- vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
- if (unlikely(!vhdr))
- goto out;
- }
+ vhdr = skb_gro_header(skb, hlen, off_vlan);
+ if (unlikely(!vhdr))
+ goto out;
type = vhdr->h_vlan_encapsulated_proto;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 26d031a43cc1..e1bb41a443c4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -128,8 +128,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
stats = this_cpu_ptr(vlan->vlan_pcpu_stats);
u64_stats_update_begin(&stats->syncp);
- stats->tx_packets++;
- stats->tx_bytes += len;
+ u64_stats_inc(&stats->tx_packets);
+ u64_stats_add(&stats->tx_bytes, len);
u64_stats_update_end(&stats->syncp);
} else {
this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped);
@@ -573,8 +573,7 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_ALL_FCOE;
dev->features |= dev->hw_features | NETIF_F_LLTX;
- netif_set_gso_max_size(dev, real_dev->gso_max_size);
- netif_set_gso_max_segs(dev, real_dev->gso_max_segs);
+ netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
@@ -616,13 +615,13 @@ static int vlan_dev_init(struct net_device *dev)
return -ENOMEM;
/* Get vlan's reference to real_dev */
- dev_hold_track(real_dev, &vlan->dev_tracker, GFP_KERNEL);
+ netdev_hold(real_dev, &vlan->dev_tracker, GFP_KERNEL);
return 0;
}
/* Note: this function might be called multiple times for the same device. */
-void vlan_dev_uninit(struct net_device *dev)
+void vlan_dev_free_egress_priority(const struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -636,6 +635,11 @@ void vlan_dev_uninit(struct net_device *dev)
}
}
+static void vlan_dev_uninit(struct net_device *dev)
+{
+ vlan_dev_free_egress_priority(dev);
+}
+
static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
netdev_features_t features)
{
@@ -670,9 +674,9 @@ static int vlan_ethtool_get_link_ksettings(struct net_device *dev,
static void vlan_ethtool_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, vlan_fullname, sizeof(info->driver));
- strlcpy(info->version, vlan_version, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->driver, vlan_fullname, sizeof(info->driver));
+ strscpy(info->version, vlan_version, sizeof(info->version));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
}
static int vlan_ethtool_get_ts_info(struct net_device *dev,
@@ -709,11 +713,11 @@ static void vlan_dev_get_stats64(struct net_device *dev,
p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
do {
start = u64_stats_fetch_begin_irq(&p->syncp);
- rxpackets = p->rx_packets;
- rxbytes = p->rx_bytes;
- rxmulticast = p->rx_multicast;
- txpackets = p->tx_packets;
- txbytes = p->tx_bytes;
+ rxpackets = u64_stats_read(&p->rx_packets);
+ rxbytes = u64_stats_read(&p->rx_bytes);
+ rxmulticast = u64_stats_read(&p->rx_multicast);
+ txpackets = u64_stats_read(&p->tx_packets);
+ txbytes = u64_stats_read(&p->tx_bytes);
} while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += rxpackets;
@@ -722,8 +726,8 @@ static void vlan_dev_get_stats64(struct net_device *dev,
stats->tx_packets += txpackets;
stats->tx_bytes += txbytes;
/* rx_errors & tx_dropped are u32 */
- rx_errors += p->rx_errors;
- tx_dropped += p->tx_dropped;
+ rx_errors += READ_ONCE(p->rx_errors);
+ tx_dropped += READ_ONCE(p->tx_dropped);
}
stats->rx_errors = rx_errors;
stats->tx_dropped = tx_dropped;
@@ -848,7 +852,7 @@ static void vlan_dev_free(struct net_device *dev)
vlan->vlan_pcpu_stats = NULL;
/* Get rid of the vlan's reference to real_dev */
- dev_put_track(vlan->real_dev, &vlan->dev_tracker);
+ netdev_put(vlan->real_dev, &vlan->dev_tracker);
}
void vlan_setup(struct net_device *dev)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0db85aeb119b..214532173536 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -182,11 +182,16 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
else if (dev->mtu > max_mtu)
return -EINVAL;
+ /* Note: If this initial vlan_changelink() fails, we need
+ * to call vlan_dev_free_egress_priority() to free memory.
+ */
err = vlan_changelink(dev, tb, data, extack);
+
if (!err)
err = register_vlan_dev(dev, extack);
+
if (err)
- vlan_dev_uninit(dev);
+ vlan_dev_free_egress_priority(dev);
return err;
}
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 08bf6c839e25..7825c129742a 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -280,7 +280,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
const struct vlan_priority_tci_mapping *mp
= vlan->egress_priority_map[i];
while (mp) {
- seq_printf(seq, "%u:%hu ",
+ seq_printf(seq, "%u:%d ",
mp->priority, ((mp->vlan_qos >> 13) & 0x7));
mp = mp->next;
}
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 64468c49791f..deabbd376cb1 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -15,6 +15,13 @@ menuconfig NET_9P
if NET_9P
+config NET_9P_FD
+ default NET_9P
+ tristate "9P FD Transport"
+ help
+ This builds support for transports over TCP, Unix sockets and
+ filedescriptors.
+
config NET_9P_VIRTIO
depends on VIRTIO
tristate "9P Virtio Transport"
diff --git a/net/9p/Makefile b/net/9p/Makefile
index aa0a5641e5d0..1df9b344c30b 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NET_9P) := 9pnet.o
+obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
@@ -9,9 +10,11 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
client.o \
error.o \
protocol.o \
- trans_fd.o \
trans_common.o \
+9pnet_fd-objs := \
+ trans_fd.o \
+
9pnet_virtio-objs := \
trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index d062f1e5bfb0..aaa37b07e30a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -255,24 +255,42 @@ static struct kmem_cache *p9_req_cache;
* p9_tag_alloc - Allocate a new request.
* @c: Client session.
* @type: Transaction type.
- * @max_size: Maximum packet size for this request.
+ * @t_size: Buffer size for holding this request
+ * (automatic calculation by format template if 0).
+ * @r_size: Buffer size for holding server's reply on this request
+ * (automatic calculation by format template if 0).
+ * @fmt: Format template for assembling 9p request message
+ * (see p9pdu_vwritef).
+ * @ap: Variable arguments to be fed to passed format template
+ * (see p9pdu_vwritef).
*
* Context: Process context.
* Return: Pointer to new request.
*/
static struct p9_req_t *
-p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
+p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size,
+ const char *fmt, va_list ap)
{
struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
- int alloc_msize = min(c->msize, max_size);
+ int alloc_tsize;
+ int alloc_rsize;
int tag;
+ va_list apc;
+
+ va_copy(apc, ap);
+ alloc_tsize = min_t(size_t, c->msize,
+ t_size ?: p9_msg_buf_size(c, type, fmt, apc));
+ va_end(apc);
+
+ alloc_rsize = min_t(size_t, c->msize,
+ r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap));
if (!req)
return ERR_PTR(-ENOMEM);
- if (p9_fcall_init(c, &req->tc, alloc_msize))
+ if (p9_fcall_init(c, &req->tc, alloc_tsize))
goto free_req;
- if (p9_fcall_init(c, &req->rc, alloc_msize))
+ if (p9_fcall_init(c, &req->rc, alloc_rsize))
goto free;
p9pdu_reset(&req->tc);
@@ -298,14 +316,14 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
/* Init ref to two because in the general case there is one ref
* that is put asynchronously by a writer thread, one ref
* temporarily given by p9_tag_lookup and put by p9_client_cb
- * in the recv thread, and one ref put by p9_tag_remove in the
+ * in the recv thread, and one ref put by p9_req_put in the
* main thread. The only exception is virtio that does not use
* p9_tag_lookup but does not have a writer thread either
* (the write happens synchronously in the request/zc_request
* callback), so p9_client_cb eats the second ref there
* as the pointer is duplicated directly by virtqueue_add_sgs()
*/
- refcount_set(&req->refcount.refcount, 2);
+ refcount_set(&req->refcount, 2);
return req;
@@ -341,7 +359,7 @@ again:
if (!p9_req_try_get(req))
goto again;
if (req->tc.tag != tag) {
- p9_req_put(req);
+ p9_req_put(c, req);
goto again;
}
}
@@ -358,30 +376,28 @@ EXPORT_SYMBOL(p9_tag_lookup);
*
* Context: Any context.
*/
-static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
{
unsigned long flags;
u16 tag = r->tc.tag;
- p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+ p9_debug(P9_DEBUG_MUX, "freeing clnt %p req %p tag: %d\n", c, r, tag);
spin_lock_irqsave(&c->lock, flags);
idr_remove(&c->reqs, tag);
spin_unlock_irqrestore(&c->lock, flags);
- return p9_req_put(r);
}
-static void p9_req_free(struct kref *ref)
+int p9_req_put(struct p9_client *c, struct p9_req_t *r)
{
- struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
-
- p9_fcall_fini(&r->tc);
- p9_fcall_fini(&r->rc);
- kmem_cache_free(p9_req_cache, r);
-}
+ if (refcount_dec_and_test(&r->refcount)) {
+ p9_tag_remove(c, r);
-int p9_req_put(struct p9_req_t *r)
-{
- return kref_put(&r->refcount, p9_req_free);
+ p9_fcall_fini(&r->tc);
+ p9_fcall_fini(&r->rc);
+ kmem_cache_free(p9_req_cache, r);
+ return 1;
+ }
+ return 0;
}
EXPORT_SYMBOL(p9_req_put);
@@ -400,7 +416,7 @@ static void p9_tag_cleanup(struct p9_client *c)
rcu_read_lock();
idr_for_each_entry(&c->reqs, req, id) {
pr_info("Tag %d still in use\n", id);
- if (p9_tag_remove(c, req) == 0)
+ if (p9_req_put(c, req) == 0)
pr_warn("Packet with tag %d has still references",
req->tc.tag);
}
@@ -426,7 +442,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
wake_up(&req->wq);
p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
- p9_req_put(req);
+ p9_req_put(c, req);
}
EXPORT_SYMBOL(p9_client_cb);
@@ -550,90 +566,6 @@ out_err:
return err;
}
-/**
- * p9_check_zc_errors - check 9p packet for error return and process it
- * @c: current client instance
- * @req: request to parse and check for error conditions
- * @uidata: external buffer containing error
- * @in_hdrlen: Size of response protocol buffer.
- *
- * returns error code if one is discovered, otherwise returns 0
- *
- * this will have to be more complicated if we have multiple
- * error packet types
- */
-
-static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
- struct iov_iter *uidata, int in_hdrlen)
-{
- int err;
- int ecode;
- s8 type;
- char *ename = NULL;
-
- err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
- /* dump the response from server
- * This should be after parse_header which poplulate pdu_fcall.
- */
- trace_9p_protocol_dump(c, &req->rc);
- if (err) {
- p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
- return err;
- }
-
- if (type != P9_RERROR && type != P9_RLERROR)
- return 0;
-
- if (!p9_is_proto_dotl(c)) {
- /* Error is reported in string format */
- int len;
- /* 7 = header size for RERROR; */
- int inline_len = in_hdrlen - 7;
-
- len = req->rc.size - req->rc.offset;
- if (len > (P9_ZC_HDR_SZ - 7)) {
- err = -EFAULT;
- goto out_err;
- }
-
- ename = &req->rc.sdata[req->rc.offset];
- if (len > inline_len) {
- /* We have error in external buffer */
- if (!copy_from_iter_full(ename + inline_len,
- len - inline_len, uidata)) {
- err = -EFAULT;
- goto out_err;
- }
- }
- ename = NULL;
- err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
- &ename, &ecode);
- if (err)
- goto out_err;
-
- if (p9_is_proto_dotu(c) && ecode < 512)
- err = -ecode;
-
- if (!err) {
- err = p9_errstr2errno(ename, strlen(ename));
-
- p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
- -ecode, ename);
- }
- kfree(ename);
- } else {
- err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
- err = -ecode;
-
- p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
- }
- return err;
-
-out_err:
- p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
- return err;
-}
-
static struct p9_req_t *
p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
@@ -673,16 +605,17 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
c->trans_mod->cancelled(c, oldreq);
}
- p9_tag_remove(c, req);
+ p9_req_put(c, req);
return 0;
}
static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
- int8_t type, int req_size,
+ int8_t type, uint t_size, uint r_size,
const char *fmt, va_list ap)
{
int err;
struct p9_req_t *req;
+ va_list apc;
p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
@@ -694,7 +627,9 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
if (c->status == BeginDisconnect && type != P9_TCLUNK)
return ERR_PTR(-EIO);
- req = p9_tag_alloc(c, type, req_size);
+ va_copy(apc, ap);
+ req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc);
+ va_end(apc);
if (IS_ERR(req))
return req;
@@ -707,9 +642,9 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
trace_9p_client_req(c, type, req->tc.tag);
return req;
reterr:
- p9_tag_remove(c, req);
+ p9_req_put(c, req);
/* We have to put also the 2nd reference as it won't be used */
- p9_req_put(req);
+ p9_req_put(c, req);
return ERR_PTR(err);
}
@@ -719,7 +654,7 @@ reterr:
* @type: type of request
* @fmt: protocol format string (see protocol.c)
*
- * Returns request structure (which client must free using p9_tag_remove)
+ * Returns request structure (which client must free using p9_req_put)
*/
static struct p9_req_t *
@@ -729,9 +664,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
int sigpending, err;
unsigned long flags;
struct p9_req_t *req;
+ /* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to
+ * auto determine an appropriate (small) request/response size
+ * according to actual message data being sent. Currently RDMA
+ * transport is excluded from this response message size optimization,
+ * as it would not cope with it, due to its pooled response buffers
+ * (using an optimized request size for RDMA as well though).
+ */
+ const uint tsize = 0;
+ const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0;
va_start(ap, fmt);
- req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
+ req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap);
va_end(ap);
if (IS_ERR(req))
return req;
@@ -746,7 +690,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
err = c->trans_mod->request(c, req);
if (err < 0) {
/* write won't happen */
- p9_req_put(req);
+ p9_req_put(c, req);
if (err != -ERESTARTSYS && err != -EFAULT)
c->status = Disconnected;
goto recalc_sigpending;
@@ -797,7 +741,7 @@ recalc_sigpending:
if (!err)
return req;
reterr:
- p9_tag_remove(c, req);
+ p9_req_put(c, req);
return ERR_PTR(safe_errno(err));
}
@@ -812,7 +756,7 @@ reterr:
* @in_hdrlen: reader header size, This is the size of response protocol data
* @fmt: protocol format string (see protocol.c)
*
- * Returns request structure (which client must free using p9_tag_remove)
+ * Returns request structure (which client must free using p9_req_put)
*/
static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
struct iov_iter *uidata,
@@ -829,7 +773,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
/* We allocate a inline protocol data of only 4k bytes.
* The actual content is passed in zero-copy fashion.
*/
- req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
+ req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap);
va_end(ap);
if (IS_ERR(req))
return req;
@@ -874,12 +818,12 @@ recalc_sigpending:
if (err < 0)
goto reterr;
- err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
+ err = p9_check_errors(c, req);
trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
- p9_tag_remove(c, req);
+ p9_req_put(c, req);
return ERR_PTR(safe_errno(err));
}
@@ -889,16 +833,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
struct p9_fid *fid;
p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
- fid = kmalloc(sizeof(*fid), GFP_KERNEL);
+ fid = kzalloc(sizeof(*fid), GFP_KERNEL);
if (!fid)
return NULL;
- memset(&fid->qid, 0, sizeof(fid->qid));
fid->mode = -1;
fid->uid = current_fsuid();
fid->clnt = clnt;
- fid->rdir = NULL;
- fid->fid = 0;
refcount_set(&fid->count, 1);
idr_preload(GFP_KERNEL);
@@ -907,8 +848,10 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
GFP_NOWAIT);
spin_unlock_irq(&clnt->lock);
idr_preload_end();
- if (!ret)
+ if (!ret) {
+ trace_9p_fid_ref(fid, P9_FID_REF_CREATE);
return fid;
+ }
kfree(fid);
return NULL;
@@ -920,6 +863,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
unsigned long flags;
p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid);
+ trace_9p_fid_ref(fid, P9_FID_REF_DESTROY);
clnt = fid->clnt;
spin_lock_irqsave(&clnt->lock, flags);
idr_remove(&clnt->fids, fid->fid);
@@ -928,6 +872,21 @@ static void p9_fid_destroy(struct p9_fid *fid)
kfree(fid);
}
+/* We also need to export tracepoint symbols for tracepoint_enabled() */
+EXPORT_TRACEPOINT_SYMBOL(9p_fid_ref);
+
+void do_trace_9p_fid_get(struct p9_fid *fid)
+{
+ trace_9p_fid_ref(fid, P9_FID_REF_GET);
+}
+EXPORT_SYMBOL(do_trace_9p_fid_get);
+
+void do_trace_9p_fid_put(struct p9_fid *fid)
+{
+ trace_9p_fid_ref(fid, P9_FID_REF_PUT);
+}
+EXPORT_SYMBOL(do_trace_9p_fid_put);
+
static int p9_client_version(struct p9_client *c)
{
int err = 0;
@@ -990,7 +949,7 @@ static int p9_client_version(struct p9_client *c)
error:
kfree(version);
- p9_tag_remove(c, req);
+ p9_req_put(c, req);
return err;
}
@@ -1038,8 +997,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto put_trans;
- if (clnt->msize > clnt->trans_mod->maxsize)
+ if (clnt->msize > clnt->trans_mod->maxsize) {
clnt->msize = clnt->trans_mod->maxsize;
+ pr_info("Limiting 'msize' to %d as this is the maximum "
+ "supported by transport %s\n",
+ clnt->msize, clnt->trans_mod->name
+ );
+ }
if (clnt->msize < 4096) {
p9_debug(P9_DEBUG_ERROR,
@@ -1139,7 +1103,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
goto error;
}
@@ -1148,7 +1112,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return fid;
error:
@@ -1195,10 +1159,10 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
goto clunk_fid;
}
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
@@ -1223,7 +1187,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
clunk_fid:
kfree(wqids);
- p9_client_clunk(fid);
+ p9_fid_put(fid);
fid = NULL;
error:
@@ -1274,7 +1238,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
fid->iounit = iounit;
free_and_error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1318,7 +1282,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
ofid->iounit = iounit;
free_and_error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1362,7 +1326,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
fid->iounit = iounit;
free_and_error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1396,7 +1360,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
qid->type, qid->path, qid->version);
free_and_error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1416,7 +1380,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna
return PTR_ERR(req);
p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return 0;
}
EXPORT_SYMBOL(p9_client_link);
@@ -1440,7 +1404,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
@@ -1454,15 +1418,6 @@ int p9_client_clunk(struct p9_fid *fid)
struct p9_req_t *req;
int retries = 0;
- if (!fid || IS_ERR(fid)) {
- pr_warn("%s (%d): Trying to clunk with invalid fid\n",
- __func__, task_pid_nr(current));
- dump_stack();
- return 0;
- }
- if (!refcount_dec_and_test(&fid->count))
- return 0;
-
again:
p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
fid->fid, retries);
@@ -1477,7 +1432,7 @@ again:
p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
/* Fid is not valid even after a failed clunk
* If interrupted, retry once then give up and
@@ -1511,10 +1466,10 @@ int p9_client_remove(struct p9_fid *fid)
p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
if (err == -ERESTARTSYS)
- p9_client_clunk(fid);
+ p9_fid_put(fid);
else
p9_fid_destroy(fid);
return err;
@@ -1538,7 +1493,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
}
p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1570,7 +1525,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int count = iov_iter_count(to);
- int rsize, non_zc = 0;
+ int rsize, received, non_zc = 0;
char *dataptr;
*err = 0;
@@ -1599,36 +1554,40 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
}
if (IS_ERR(req)) {
*err = PTR_ERR(req);
+ if (!non_zc)
+ iov_iter_revert(to, count - iov_iter_count(to));
return 0;
}
*err = p9pdu_readf(&req->rc, clnt->proto_version,
- "D", &count, &dataptr);
+ "D", &received, &dataptr);
if (*err) {
+ if (!non_zc)
+ iov_iter_revert(to, count - iov_iter_count(to));
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return 0;
}
- if (rsize < count) {
- pr_err("bogus RREAD count (%d > %d)\n", count, rsize);
- count = rsize;
+ if (rsize < received) {
+ pr_err("bogus RREAD count (%d > %d)\n", received, rsize);
+ received = rsize;
}
p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
if (non_zc) {
- int n = copy_to_iter(dataptr, count, to);
+ int n = copy_to_iter(dataptr, received, to);
- if (n != count) {
+ if (n != received) {
*err = -EFAULT;
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return n;
}
} else {
- iov_iter_advance(to, count);
+ iov_iter_revert(to, count - received - iov_iter_count(to));
}
- p9_tag_remove(clnt, req);
- return count;
+ p9_req_put(clnt, req);
+ return received;
}
EXPORT_SYMBOL(p9_client_read_once);
@@ -1646,6 +1605,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
+ int written;
if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
rsize = clnt->msize - P9_IOHDRSZ;
@@ -1663,27 +1623,29 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
offset, rsize, from);
}
if (IS_ERR(req)) {
+ iov_iter_revert(from, count - iov_iter_count(from));
*err = PTR_ERR(req);
break;
}
- *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
+ *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
if (*err) {
+ iov_iter_revert(from, count - iov_iter_count(from));
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
break;
}
- if (rsize < count) {
- pr_err("bogus RWRITE count (%d > %d)\n", count, rsize);
- count = rsize;
+ if (rsize < written) {
+ pr_err("bogus RWRITE count (%d > %d)\n", written, rsize);
+ written = rsize;
}
p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
- p9_tag_remove(clnt, req);
- iov_iter_advance(from, count);
- total += count;
- offset += count;
+ p9_req_put(clnt, req);
+ iov_iter_revert(from, count - written - iov_iter_count(from));
+ total += written;
+ offset += written;
}
return total;
}
@@ -1715,7 +1677,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
goto error;
}
@@ -1732,7 +1694,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
from_kgid(&init_user_ns, ret->n_gid),
from_kuid(&init_user_ns, ret->n_muid));
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return ret;
error:
@@ -1768,7 +1730,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
goto error;
}
@@ -1794,7 +1756,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
ret->st_btime_sec, ret->st_btime_nsec,
ret->st_gen, ret->st_data_version);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return ret;
error:
@@ -1866,7 +1828,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1898,7 +1860,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1926,7 +1888,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
goto error;
}
@@ -1935,7 +1897,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
fid->fid, sb->type, sb->bsize, sb->blocks, sb->bfree,
sb->bavail, sb->files, sb->ffree, sb->fsid, sb->namelen);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1963,7 +1925,7 @@ int p9_client_rename(struct p9_fid *fid,
p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -1993,7 +1955,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
newdirfid->fid, new_name);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -2029,15 +1991,15 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
if (err) {
trace_9p_protocol_dump(clnt, &req->rc);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
goto clunk_fid;
}
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
attr_fid->fid, *attr_size);
return attr_fid;
clunk_fid:
- p9_client_clunk(attr_fid);
+ p9_fid_put(attr_fid);
attr_fid = NULL;
error:
if (attr_fid && attr_fid != file_fid)
@@ -2066,7 +2028,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -2128,11 +2090,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
if (non_zc)
memmove(data, dataptr, count);
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return count;
free_and_error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
error:
return err;
}
@@ -2164,7 +2126,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
qid->type, qid->path, qid->version);
error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_mknod_dotl);
@@ -2194,7 +2156,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
qid->path, qid->version);
error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_mkdir_dotl);
@@ -2226,7 +2188,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
}
p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_lock_dotl);
@@ -2263,7 +2225,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
glock->type, glock->start, glock->length,
glock->proc_id, glock->client_id);
error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_getlock_dotl);
@@ -2289,7 +2251,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
}
p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
error:
- p9_tag_remove(clnt, req);
+ p9_req_put(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_readlink);
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c37fc201a944..55576c1866fa 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -83,7 +83,7 @@ void v9fs_unregister_trans(struct p9_trans_module *m)
}
EXPORT_SYMBOL(v9fs_unregister_trans);
-static struct p9_trans_module *_p9_get_trans_by_name(char *s)
+static struct p9_trans_module *_p9_get_trans_by_name(const char *s)
{
struct p9_trans_module *t, *found = NULL;
@@ -106,7 +106,7 @@ static struct p9_trans_module *_p9_get_trans_by_name(char *s)
* @s: string identifying transport
*
*/
-struct p9_trans_module *v9fs_get_trans_by_name(char *s)
+struct p9_trans_module *v9fs_get_trans_by_name(const char *s)
{
struct p9_trans_module *found = NULL;
@@ -123,6 +123,10 @@ struct p9_trans_module *v9fs_get_trans_by_name(char *s)
}
EXPORT_SYMBOL(v9fs_get_trans_by_name);
+static const char * const v9fs_default_transports[] = {
+ "virtio", "tcp", "fd", "unix", "xen", "rdma",
+};
+
/**
* v9fs_get_default_trans - get the default transport
*
@@ -131,6 +135,7 @@ EXPORT_SYMBOL(v9fs_get_trans_by_name);
struct p9_trans_module *v9fs_get_default_trans(void)
{
struct p9_trans_module *t, *found = NULL;
+ int i;
spin_lock(&v9fs_trans_lock);
@@ -148,6 +153,10 @@ struct p9_trans_module *v9fs_get_default_trans(void)
}
spin_unlock(&v9fs_trans_lock);
+
+ for (i = 0; !found && i < ARRAY_SIZE(v9fs_default_transports); i++)
+ found = v9fs_get_trans_by_name(v9fs_default_transports[i]);
+
return found;
}
EXPORT_SYMBOL(v9fs_get_default_trans);
@@ -177,7 +186,6 @@ static int __init init_p9(void)
p9_error_init();
pr_info("Installing 9P2000 support\n");
- p9_trans_fd_init();
return ret;
}
@@ -191,7 +199,6 @@ static void __exit exit_p9(void)
{
pr_info("Unloading 9P2000 support\n");
- p9_trans_fd_exit();
p9_client_exit();
}
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3754c33e2974..4e3a2a1ffcb3 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -23,6 +23,173 @@
#include <trace/events/9p.h>
+/* len[2] text[len] */
+#define P9_STRLEN(s) \
+ (2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))
+
+/**
+ * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
+ * intended 9p message.
+ * @c: client
+ * @type: message type
+ * @fmt: format template for assembling request message
+ * (see p9pdu_vwritef)
+ * @ap: variable arguments to be fed to passed format template
+ * (see p9pdu_vwritef)
+ *
+ * Note: Even for response types (P9_R*) the format template and variable
+ * arguments must always be for the originating request type (P9_T*).
+ */
+size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
+ const char *fmt, va_list ap)
+{
+ /* size[4] type[1] tag[2] */
+ const int hdr = 4 + 1 + 2;
+ /* ename[s] errno[4] */
+ const int rerror_size = hdr + P9_ERRMAX + 4;
+ /* ecode[4] */
+ const int rlerror_size = hdr + 4;
+ const int err_size =
+ c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;
+
+ static_assert(NAME_MAX <= 4*1024, "p9_msg_buf_size() currently assumes "
+ "a max. allowed directory entry name length of 4k");
+
+ switch (type) {
+
+ /* message types not used at all */
+ case P9_TERROR:
+ case P9_TLERROR:
+ case P9_TAUTH:
+ case P9_RAUTH:
+ BUG();
+
+ /* variable length & potentially large message types */
+ case P9_TATTACH:
+ BUG_ON(strcmp("ddss?u", fmt));
+ va_arg(ap, int32_t);
+ va_arg(ap, int32_t);
+ {
+ const char *uname = va_arg(ap, const char *);
+ const char *aname = va_arg(ap, const char *);
+ /* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
+ return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
+ }
+ case P9_TWALK:
+ BUG_ON(strcmp("ddT", fmt));
+ va_arg(ap, int32_t);
+ va_arg(ap, int32_t);
+ {
+ uint i, nwname = va_arg(ap, int);
+ size_t wname_all;
+ const char **wnames = va_arg(ap, const char **);
+ for (i = 0, wname_all = 0; i < nwname; ++i) {
+ wname_all += P9_STRLEN(wnames[i]);
+ }
+ /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
+ return hdr + 4 + 4 + 2 + wname_all;
+ }
+ case P9_RWALK:
+ BUG_ON(strcmp("ddT", fmt));
+ va_arg(ap, int32_t);
+ va_arg(ap, int32_t);
+ {
+ uint nwname = va_arg(ap, int);
+ /* nwqid[2] nwqid*(wqid[13]) */
+ return max_t(size_t, hdr + 2 + nwname * 13, err_size);
+ }
+ case P9_TCREATE:
+ BUG_ON(strcmp("dsdb?s", fmt));
+ va_arg(ap, int32_t);
+ {
+ const char *name = va_arg(ap, const char *);
+ if (c->proto_version == p9_proto_legacy) {
+ /* fid[4] name[s] perm[4] mode[1] */
+ return hdr + 4 + P9_STRLEN(name) + 4 + 1;
+ } else {
+ va_arg(ap, int32_t);
+ va_arg(ap, int);
+ {
+ const char *ext = va_arg(ap, const char *);
+ /* fid[4] name[s] perm[4] mode[1] extension[s] */
+ return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
+ }
+ }
+ }
+ case P9_TLCREATE:
+ BUG_ON(strcmp("dsddg", fmt));
+ va_arg(ap, int32_t);
+ {
+ const char *name = va_arg(ap, const char *);
+ /* fid[4] name[s] flags[4] mode[4] gid[4] */
+ return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
+ }
+ case P9_RREAD:
+ case P9_RREADDIR:
+ BUG_ON(strcmp("dqd", fmt));
+ va_arg(ap, int32_t);
+ va_arg(ap, int64_t);
+ {
+ const int32_t count = va_arg(ap, int32_t);
+ /* count[4] data[count] */
+ return max_t(size_t, hdr + 4 + count, err_size);
+ }
+ case P9_TWRITE:
+ BUG_ON(strcmp("dqV", fmt));
+ va_arg(ap, int32_t);
+ va_arg(ap, int64_t);
+ {
+ const int32_t count = va_arg(ap, int32_t);
+ /* fid[4] offset[8] count[4] data[count] */
+ return hdr + 4 + 8 + 4 + count;
+ }
+ case P9_TRENAMEAT:
+ BUG_ON(strcmp("dsds", fmt));
+ va_arg(ap, int32_t);
+ {
+ const char *oldname, *newname;
+ oldname = va_arg(ap, const char *);
+ va_arg(ap, int32_t);
+ newname = va_arg(ap, const char *);
+ /* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
+ return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
+ }
+ case P9_TSYMLINK:
+ BUG_ON(strcmp("dssg", fmt));
+ va_arg(ap, int32_t);
+ {
+ const char *name = va_arg(ap, const char *);
+ const char *symtgt = va_arg(ap, const char *);
+ /* fid[4] name[s] symtgt[s] gid[4] */
+ return hdr + 4 + P9_STRLEN(name) + P9_STRLEN(symtgt) + 4;
+ }
+
+ case P9_RERROR:
+ return rerror_size;
+ case P9_RLERROR:
+ return rlerror_size;
+
+ /* small message types */
+ case P9_TWSTAT:
+ case P9_RSTAT:
+ case P9_RREADLINK:
+ case P9_TXATTRWALK:
+ case P9_TXATTRCREATE:
+ case P9_TLINK:
+ case P9_TMKDIR:
+ case P9_TMKNOD:
+ case P9_TRENAME:
+ case P9_TUNLINKAT:
+ case P9_TLOCK:
+ return 8 * 1024;
+
+ /* tiny message types */
+ default:
+ return 4 * 1024;
+
+ }
+}
+
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
@@ -63,9 +230,8 @@ static size_t
pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
- struct iov_iter i = *from;
- if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i))
+ if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, from))
len = 0;
pdu->size += len;
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 6d719c30331a..ad2283d1f96b 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -8,6 +8,8 @@
* Copyright (C) 2008 by IBM, Corp.
*/
+size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
+ const char *fmt, va_list ap);
int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 827c47620fc0..56a186768750 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -91,6 +91,7 @@ struct p9_poll_wait {
* @mux_list: list link for mux to manage multiple connections (?)
* @client: reference to client instance for this connection
* @err: error state
+ * @req_lock: lock protecting req_list and requests statuses
* @req_list: accounting for requests which have been sent
* @unsent_req_list: accounting for requests that haven't been sent
* @rreq: read request
@@ -114,6 +115,7 @@ struct p9_conn {
struct list_head mux_list;
struct p9_client *client;
int err;
+ spinlock_t req_lock;
struct list_head req_list;
struct list_head unsent_req_list;
struct p9_req_t *rreq;
@@ -189,10 +191,10 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);
- spin_lock(&m->client->lock);
+ spin_lock(&m->req_lock);
if (m->err) {
- spin_unlock(&m->client->lock);
+ spin_unlock(&m->req_lock);
return;
}
@@ -205,6 +207,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
list_move(&req->req_list, &cancel_list);
}
+ spin_unlock(&m->req_lock);
+
list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
list_del(&req->req_list);
@@ -212,7 +216,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
req->t_err = err;
p9_client_cb(m->client, req, REQ_STATUS_ERROR);
}
- spin_unlock(&m->client->lock);
}
static __poll_t
@@ -343,6 +346,7 @@ static void p9_read_work(struct work_struct *work)
p9_debug(P9_DEBUG_ERROR,
"No recv fcall for tag %d (req %p), disconnecting!\n",
m->rc.tag, m->rreq);
+ p9_req_put(m->client, m->rreq);
m->rreq = NULL;
err = -EIO;
goto error;
@@ -358,7 +362,7 @@ static void p9_read_work(struct work_struct *work)
if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
m->rreq->rc.size = m->rc.offset;
- spin_lock(&m->client->lock);
+ spin_lock(&m->req_lock);
if (m->rreq->status == REQ_STATUS_SENT) {
list_del(&m->rreq->req_list);
p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
@@ -367,18 +371,18 @@ static void p9_read_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS,
"Ignore replies associated with a cancelled request\n");
} else {
- spin_unlock(&m->client->lock);
+ spin_unlock(&m->req_lock);
p9_debug(P9_DEBUG_ERROR,
"Request tag %d errored out while we were reading the reply\n",
m->rc.tag);
err = -EIO;
goto error;
}
- spin_unlock(&m->client->lock);
+ spin_unlock(&m->req_lock);
m->rc.sdata = NULL;
m->rc.offset = 0;
m->rc.capacity = 0;
- p9_req_put(m->rreq);
+ p9_req_put(m->client, m->rreq);
m->rreq = NULL;
}
@@ -452,10 +456,10 @@ static void p9_write_work(struct work_struct *work)
}
if (!m->wsize) {
- spin_lock(&m->client->lock);
+ spin_lock(&m->req_lock);
if (list_empty(&m->unsent_req_list)) {
clear_bit(Wworksched, &m->wsched);
- spin_unlock(&m->client->lock);
+ spin_unlock(&m->req_lock);
return;
}
@@ -470,7 +474,7 @@ static void p9_write_work(struct work_struct *work)
m->wpos = 0;
p9_req_get(req);
m->wreq = req;
- spin_unlock(&m->client->lock);
+ spin_unlock(&m->req_lock);
}
p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n",
@@ -492,7 +496,7 @@ static void p9_write_work(struct work_struct *work)
m->wpos += err;
if (m->wpos == m->wsize) {
m->wpos = m->wsize = 0;
- p9_req_put(m->wreq);
+ p9_req_put(m->client, m->wreq);
m->wreq = NULL;
}
@@ -587,6 +591,7 @@ static void p9_conn_create(struct p9_client *client)
INIT_LIST_HEAD(&m->mux_list);
m->client = client;
+ spin_lock_init(&m->req_lock);
INIT_LIST_HEAD(&m->req_list);
INIT_LIST_HEAD(&m->unsent_req_list);
INIT_WORK(&m->rq, p9_read_work);
@@ -668,10 +673,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
if (m->err < 0)
return m->err;
- spin_lock(&client->lock);
+ spin_lock(&m->req_lock);
req->status = REQ_STATUS_UNSENT;
list_add_tail(&req->req_list, &m->unsent_req_list);
- spin_unlock(&client->lock);
+ spin_unlock(&m->req_lock);
if (test_and_clear_bit(Wpending, &m->wsched))
n = EPOLLOUT;
@@ -686,33 +691,38 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
{
+ struct p9_trans_fd *ts = client->trans;
+ struct p9_conn *m = &ts->conn;
int ret = 1;
p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
- spin_lock(&client->lock);
+ spin_lock(&m->req_lock);
if (req->status == REQ_STATUS_UNSENT) {
list_del(&req->req_list);
req->status = REQ_STATUS_FLSHD;
- p9_req_put(req);
+ p9_req_put(client, req);
ret = 0;
}
- spin_unlock(&client->lock);
+ spin_unlock(&m->req_lock);
return ret;
}
static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
{
+ struct p9_trans_fd *ts = client->trans;
+ struct p9_conn *m = &ts->conn;
+
p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
- spin_lock(&client->lock);
+ spin_lock(&m->req_lock);
/* Ignore cancelled request if message has been received
* before lock.
*/
if (req->status == REQ_STATUS_RCVD) {
- spin_unlock(&client->lock);
+ spin_unlock(&m->req_lock);
return 0;
}
@@ -721,8 +731,9 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
*/
list_del(&req->req_list);
req->status = REQ_STATUS_FLSHD;
- spin_unlock(&client->lock);
- p9_req_put(req);
+ spin_unlock(&m->req_lock);
+
+ p9_req_put(client, req);
return 0;
}
@@ -820,11 +831,14 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
goto out_free_ts;
if (!(ts->rd->f_mode & FMODE_READ))
goto out_put_rd;
+ /* prevent workers from hanging on IO when fd is a pipe */
+ ts->rd->f_flags |= O_NONBLOCK;
ts->wr = fget(wfd);
if (!ts->wr)
goto out_put_rd;
if (!(ts->wr->f_mode & FMODE_WRITE))
goto out_put_wr;
+ ts->wr->f_flags |= O_NONBLOCK;
client->trans = ts;
client->status = Connected;
@@ -883,12 +897,12 @@ static void p9_conn_destroy(struct p9_conn *m)
p9_mux_poll_stop(m);
cancel_work_sync(&m->rq);
if (m->rreq) {
- p9_req_put(m->rreq);
+ p9_req_put(m->client, m->rreq);
m->rreq = NULL;
}
cancel_work_sync(&m->wq);
if (m->wreq) {
- p9_req_put(m->wreq);
+ p9_req_put(m->client, m->wreq);
m->wreq = NULL;
}
@@ -1060,7 +1074,9 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
int err;
struct p9_fd_opts opts;
- parse_opts(args, &opts);
+ err = parse_opts(args, &opts);
+ if (err < 0)
+ return err;
client->trans_opts.fd.rfd = opts.rfd;
client->trans_opts.fd.wfd = opts.wfd;
@@ -1081,6 +1097,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
static struct p9_trans_module p9_tcp_trans = {
.name = "tcp",
.maxsize = MAX_SOCK_BUF,
+ .pooled_rbuffers = false,
.def = 0,
.create = p9_fd_create_tcp,
.close = p9_fd_close,
@@ -1090,6 +1107,7 @@ static struct p9_trans_module p9_tcp_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("tcp");
static struct p9_trans_module p9_unix_trans = {
.name = "unix",
@@ -1103,6 +1121,7 @@ static struct p9_trans_module p9_unix_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("unix");
static struct p9_trans_module p9_fd_trans = {
.name = "fd",
@@ -1116,6 +1135,7 @@ static struct p9_trans_module p9_fd_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("fd");
/**
* p9_poll_workfn - poll worker thread
@@ -1149,7 +1169,7 @@ static void p9_poll_workfn(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "finish\n");
}
-int p9_trans_fd_init(void)
+static int __init p9_trans_fd_init(void)
{
v9fs_register_trans(&p9_tcp_trans);
v9fs_register_trans(&p9_unix_trans);
@@ -1158,10 +1178,17 @@ int p9_trans_fd_init(void)
return 0;
}
-void p9_trans_fd_exit(void)
+static void __exit p9_trans_fd_exit(void)
{
flush_work(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
}
+
+module_init(p9_trans_fd_init);
+module_exit(p9_trans_fd_exit);
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
+MODULE_LICENSE("GPL");
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 88e563826674..6ff706760676 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -350,7 +350,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
c->busa, c->req->tc.size,
DMA_TO_DEVICE);
up(&rdma->sq_sem);
- p9_req_put(c->req);
+ p9_req_put(client, c->req);
kfree(c);
}
@@ -739,6 +739,7 @@ error:
static struct p9_trans_module p9_rdma_trans = {
.name = "rdma",
.maxsize = P9_RDMA_MAXSIZE,
+ .pooled_rbuffers = true,
.def = 0,
.owner = THIS_MODULE,
.create = rdma_create_trans,
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index bd5a89c4960d..e757f0601304 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -199,7 +199,7 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
/* Reply won't come, so drop req ref */
static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req)
{
- p9_req_put(req);
+ p9_req_put(client, req);
return 0;
}
@@ -331,7 +331,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
if (err == -ERESTARTSYS)
return err;
}
- n = iov_iter_get_pages_alloc(data, pages, count, offs);
+ n = iov_iter_get_pages_alloc2(data, pages, count, offs);
if (n < 0)
return n;
*need_drop = 1;
@@ -373,10 +373,40 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
(*pages)[index] = kmap_to_page(p);
p += PAGE_SIZE;
}
+ iov_iter_advance(data, len);
return len;
}
}
+static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
+ size_t offs, struct page **pages)
+{
+ unsigned size, n;
+ void *to = req->rc.sdata + in_hdr_len;
+
+ // Fits entirely into the static data? Nothing to do.
+ if (req->rc.size < in_hdr_len)
+ return;
+
+ // Really long error message? Tough, truncate the reply. Might get
+ // rejected (we can't be arsed to adjust the size encoded in header,
+ // or string size for that matter), but it wouldn't be anything valid
+ // anyway.
+ if (unlikely(req->rc.size > P9_ZC_HDR_SZ))
+ req->rc.size = P9_ZC_HDR_SZ;
+
+ // data won't span more than two pages
+ size = req->rc.size - in_hdr_len;
+ n = PAGE_SIZE - offs;
+ if (size > n) {
+ memcpy_from_page(to, *pages++, offs, n);
+ offs = 0;
+ to += n;
+ size -= n;
+ }
+ memcpy_from_page(to, *pages, offs, size);
+}
+
/**
* p9_virtio_zc_request - issue a zero copy request
* @client: client instance issuing the request
@@ -503,6 +533,11 @@ req_retry_pinned:
kicked = 1;
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
+ // RERROR needs reply (== error string) in static data
+ if (req->status == REQ_STATUS_RCVD &&
+ unlikely(req->rc.sdata[4] == P9_RERROR))
+ handle_rerror(req, in_hdr_len, offs, in_pages);
+
/*
* Non kernel buffers are pinned, unpin them
*/
@@ -523,7 +558,7 @@ err_out:
kvfree(out_pages);
if (!kicked) {
/* reply won't come */
- p9_req_put(req);
+ p9_req_put(client, req);
}
return err;
}
@@ -648,7 +683,7 @@ fail:
* @args: args passed from sys_mount() for per-transport options (unused)
*
* This sets up a transport channel for 9p communication. Right now
- * we only match the first available channel, but eventually we couldlook up
+ * we only match the first available channel, but eventually we could look up
* alternate channels by matching devname versus a virtio_config entry.
* We use a simple reference count mechanism to ensure that only a single
* mount has a channel open at a time.
@@ -721,7 +756,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
mutex_unlock(&virtio_9p_lock);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
@@ -767,6 +802,7 @@ static struct p9_trans_module p9_virtio_trans = {
* page in zero copy.
*/
.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
+ .pooled_rbuffers = false,
.def = 1,
.owner = THIS_MODULE,
};
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 2418fa0b58f3..b15c64128c3e 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -163,7 +163,7 @@ again:
ring->intf->out_prod = prod;
spin_unlock_irqrestore(&ring->lock, flags);
notify_remote_via_irq(ring->irq);
- p9_req_put(p9_req);
+ p9_req_put(client, p9_req);
return 0;
}
@@ -246,6 +246,7 @@ static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r)
static struct p9_trans_module p9_xen_trans = {
.name = "xen",
.maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT - 2),
+ .pooled_rbuffers = false,
.def = 1,
.create = p9_xen_create,
.close = p9_xen_close,
@@ -279,13 +280,13 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
grant_ref_t ref;
ref = priv->rings[i].intf->ref[j];
- gnttab_end_foreign_access(ref, 0, 0);
+ gnttab_end_foreign_access(ref, NULL);
}
- free_pages((unsigned long)priv->rings[i].data.in,
- priv->rings[i].intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(priv->rings[i].data.in,
+ 1UL << (priv->rings[i].intf->ring_order +
+ XEN_PAGE_SHIFT));
}
- gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
+ gnttab_end_foreign_access(priv->rings[i].ref, NULL);
free_page((unsigned long)priv->rings[i].intf);
}
kfree(priv->rings);
@@ -322,8 +323,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
if (ret < 0)
goto out;
ring->ref = ret;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT),
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes) {
ret = -ENOMEM;
goto out;
@@ -353,12 +354,10 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
out:
if (bytes) {
for (i--; i >= 0; i--)
- gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
- free_pages((unsigned long)bytes,
- ring->intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ gnttab_end_foreign_access(ring->intf->ref[i], NULL);
+ free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
}
- gnttab_end_foreign_access(ring->ref, 0, 0);
+ gnttab_end_foreign_access(ring->ref, NULL);
free_page((unsigned long)ring->intf);
return ret;
}
@@ -512,7 +511,7 @@ static struct xenbus_driver xen_9pfs_front_driver = {
.otherend_changed = xen_9pfs_front_changed,
};
-static int p9_trans_xen_init(void)
+static int __init p9_trans_xen_init(void)
{
int rc;
@@ -531,13 +530,14 @@ static int p9_trans_xen_init(void)
module_init(p9_trans_xen_init);
MODULE_ALIAS_9P("xen");
-static void p9_trans_xen_exit(void)
+static void __exit p9_trans_xen_exit(void)
{
v9fs_unregister_trans(&p9_xen_trans);
return xenbus_unregister_driver(&xen_9pfs_front_driver);
}
module_exit(p9_trans_xen_exit);
+MODULE_ALIAS("xen:9pfs");
MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
MODULE_DESCRIPTION("Xen Transport for 9P");
MODULE_LICENSE("GPL");
diff --git a/net/Kconfig b/net/Kconfig
index 8a1f9d0287de..48c33c222199 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -204,7 +204,6 @@ config BRIDGE_NETFILTER
source "net/netfilter/Kconfig"
source "net/ipv4/netfilter/Kconfig"
source "net/ipv6/netfilter/Kconfig"
-source "net/decnet/netfilter/Kconfig"
source "net/bridge/netfilter/Kconfig"
endif
@@ -221,7 +220,6 @@ source "net/802/Kconfig"
source "net/bridge/Kconfig"
source "net/dsa/Kconfig"
source "net/8021q/Kconfig"
-source "net/decnet/Kconfig"
source "net/llc/Kconfig"
source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
@@ -434,6 +432,19 @@ config NET_DEVLINK
config PAGE_POOL
bool
+config PAGE_POOL_STATS
+ default n
+ bool "Page pool stats"
+ depends on PAGE_POOL
+ help
+ Enable page pool statistics to track page allocation and recycling
+ in page pools. This option incurs additional CPU cost in allocation
+ and recycle paths and additional memory cost to store the statistics.
+ These statistics are only available if this option is enabled and if
+ the driver using the page pool supports exporting this data.
+
+ If unsure, say N.
+
config FAILOVER
tristate "Generic failover module"
help
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index 2f50611df858..5e3fffe707dd 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -2,7 +2,7 @@
config NET_DEV_REFCNT_TRACKER
bool "Enable net device refcount tracking"
- depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
+ depends on DEBUG_KERNEL && STACKTRACE_SUPPORT && NET
select REF_TRACKER
default n
help
@@ -11,9 +11,16 @@ config NET_DEV_REFCNT_TRACKER
config NET_NS_REFCNT_TRACKER
bool "Enable networking namespace refcount tracking"
- depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
+ depends on DEBUG_KERNEL && STACKTRACE_SUPPORT && NET
select REF_TRACKER
default n
help
Enable debugging feature to track netns references.
This adds memory and cpu costs.
+
+config DEBUG_NET
+ bool "Add generic networking debug"
+ depends on DEBUG_KERNEL && NET
+ help
+ Enable extra sanity checks in networking.
+ This is mostly used by fuzzers, but is safe to select.
diff --git a/net/Makefile b/net/Makefile
index fbfeb8a0bb37..6a62e5b27378 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_AF_KCM) += kcm/
obj-$(CONFIG_STREAM_PARSER) += strparser/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_L2TP) += l2tp/
-obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_PHONET) += phonet/
ifneq ($(CONFIG_VLAN_8021Q),)
obj-y += 8021q/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index bf5736c1d458..a06f4d4a6f47 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int err = 0;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (!skb)
diff --git a/net/atm/common.c b/net/atm/common.c
index 1cfa9bf1d187..f7019df41c3e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
!test_bit(ATM_VF_READY, &vcc->flags))
return 0;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
@@ -553,7 +553,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
error = skb_copy_datagram_msg(skb, 0, msg, copied);
if (error)
return error;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 829db9eba0cb..aaf64b953915 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -219,11 +219,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
if (!page)
return -ENOMEM;
- for (p = page, len = 0; len < nbytes; p++, len++) {
+ for (p = page, len = 0; len < nbytes; p++) {
if (get_user(*p, buff++)) {
free_page((unsigned long)page);
return -EFAULT;
}
+ len += 1;
if (*p == '\0' || *p == '\n')
break;
}
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4369ffa3302a..9bf736290e48 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -108,7 +108,7 @@ out:
static inline void *vcc_walk(struct seq_file *seq, loff_t l)
{
struct vcc_state *state = seq->private;
- int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
+ int family = (uintptr_t)(pde_data(file_inode(seq->file)));
return __vcc_walk(&state->sk, family, &state->bucket, l) ?
state : NULL;
@@ -324,7 +324,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
page = get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- dev = PDE_DATA(file_inode(file));
+ dev = pde_data(file_inode(file));
if (!dev->ops->proc_read)
length = -EINVAL;
else {
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 02f43f3e2c56..6b4c25a92377 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -62,12 +62,12 @@ static void ax25_free_sock(struct sock *sk)
*/
static void ax25_cb_del(ax25_cb *ax25)
{
+ spin_lock_bh(&ax25_list_lock);
if (!hlist_unhashed(&ax25->ax25_node)) {
- spin_lock_bh(&ax25_list_lock);
hlist_del_init(&ax25->ax25_node);
- spin_unlock_bh(&ax25_list_lock);
ax25_cb_put(ax25);
}
+ spin_unlock_bh(&ax25_list_lock);
}
/*
@@ -77,21 +77,39 @@ static void ax25_kill_by_device(struct net_device *dev)
{
ax25_dev *ax25_dev;
ax25_cb *s;
+ struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
return;
+ ax25_dev->device_up = false;
spin_lock_bh(&ax25_list_lock);
again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
+ sk = s->sk;
+ if (!sk) {
+ spin_unlock_bh(&ax25_list_lock);
+ ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ ax25_cb_del(s);
+ spin_lock_bh(&ax25_list_lock);
+ goto again;
+ }
+ sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
- lock_sock(s->sk);
- s->ax25_dev = NULL;
- release_sock(s->sk);
+ lock_sock(sk);
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ if (sk->sk_socket) {
+ netdev_put(ax25_dev->dev,
+ &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
+ }
+ ax25_cb_del(s);
+ release_sock(sk);
spin_lock_bh(&ax25_list_lock);
-
+ sock_put(sk);
/* The entry could have been deleted from the
* list meanwhile and thus the next pointer is
* no longer valid. Play it safe and restart
@@ -355,21 +373,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
return -EFAULT;
- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
- return -ENODEV;
-
if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
return -EINVAL;
if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+ if (!ax25_dev)
+ return -ENODEV;
+
digi.ndigi = ax25_ctl.digi_count;
for (k = 0; k < digi.ndigi; k++)
digi.calls[k] = ax25_ctl.digi_addr[k];
- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+ if (!ax25) {
+ ax25_dev_put(ax25_dev);
return -ENOTCONN;
+ }
switch (ax25_ctl.cmd) {
case AX25_KILL:
@@ -436,6 +458,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
}
out_put:
+ ax25_dev_put(ax25_dev);
ax25_cb_put(ax25);
return ret;
@@ -755,7 +778,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname,
ax25_dev = ax25->ax25_dev;
if (ax25_dev != NULL && ax25_dev->dev != NULL) {
- strlcpy(devname, ax25_dev->dev->name, sizeof(devname));
+ strscpy(devname, ax25_dev->dev->name, sizeof(devname));
length = strlen(devname) + 1;
} else {
*devname = '\0';
@@ -962,21 +985,25 @@ static int ax25_release(struct socket *sock)
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
+ ax25_dev *ax25_dev;
if (sk == NULL)
return 0;
sock_hold(sk);
- sock_orphan(sk);
lock_sock(sk);
+ sock_orphan(sk);
ax25 = sk_to_ax25(sk);
+ ax25_dev = ax25->ax25_dev;
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
case AX25_STATE_0:
- release_sock(sk);
- ax25_disconnect(ax25, 0);
- lock_sock(sk);
+ if (!sock_flag(ax25->sk, SOCK_DEAD)) {
+ release_sock(sk);
+ ax25_disconnect(ax25, 0);
+ lock_sock(sk);
+ }
ax25_destroy_socket(ax25);
break;
@@ -1031,6 +1058,17 @@ static int ax25_release(struct socket *sock)
sk->sk_state_change(sk);
ax25_destroy_socket(ax25);
}
+ if (ax25_dev) {
+ if (!ax25_dev->device_up) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ }
+ netdev_put(ax25_dev->dev, &ax25->dev_tracker);
+ ax25_dev_put(ax25_dev);
+ }
sock->sk = NULL;
release_sock(sk);
@@ -1107,8 +1145,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
}
- if (ax25_dev != NULL)
+ if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
+ }
done:
ax25_cb_add(ax25);
@@ -1622,9 +1662,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1636,11 +1679,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1687,6 +1748,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 256fadb94df3..c5462486dbca 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
+ ax25_dev_hold(ax25_dev);
}
spin_unlock_bh(&ax25_dev_lock);
@@ -51,15 +52,18 @@ void ax25_dev_device_up(struct net_device *dev)
{
ax25_dev *ax25_dev;
- if ((ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) {
+ ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_KERNEL);
+ if (!ax25_dev) {
printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n");
return;
}
+ refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
- dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL);
ax25_dev->forward = NULL;
+ ax25_dev->device_up = true;
ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE;
ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE;
@@ -84,6 +88,7 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->next = ax25_dev_list;
ax25_dev_list = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -112,27 +117,28 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
- return;
+ goto unlock_put;
}
while (s != NULL && s->next != NULL) {
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
- return;
+ goto unlock_put;
}
s = s->next;
}
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
+ ax25_dev_put(ax25_dev);
+ return;
+
+unlock_put:
+ spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
+ dev->ax25_ptr = NULL;
+ netdev_put(dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
switch (cmd) {
case SIOCAX25ADDFWD:
- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+ if (!fwd_dev) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
- if (ax25_dev->forward != NULL)
+ }
+ if (ax25_dev->forward) {
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = fwd_dev->dev;
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
break;
case SIOCAX25DELFWD:
- if (ax25_dev->forward == NULL)
+ if (!ax25_dev->forward) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = NULL;
+ ax25_dev_put(ax25_dev);
break;
default:
+ ax25_dev_put(ax25_dev);
return -EINVAL;
}
@@ -188,7 +206,7 @@ void __exit ax25_dev_free(void)
ax25_dev = ax25_dev_list;
while (ax25_dev != NULL) {
s = ax25_dev;
- dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker);
ax25_dev = ax25_dev->next;
kfree(s);
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d0b2e094bd55..b7c4d656a94b 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_dev *ax25_dev;
int i;
- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
- return -EINVAL;
if (route->digi_count > AX25_MAX_DIGIS)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+ if (!ax25_dev)
+ return -EINVAL;
+
write_lock_bh(&ax25_route_lock);
ax25_rt = ax25_route_list;
@@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if (route->digi_count != 0) {
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
ax25_rt = ax25_rt->next;
@@ -108,10 +112,10 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
- refcount_set(&ax25_rt->refcount, 1);
ax25_rt->callsign = route->dest_addr;
ax25_rt->dev = ax25_dev->dev;
ax25_rt->digipeat = NULL;
@@ -120,6 +124,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
kfree(ax25_rt);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -132,6 +137,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_rt->next = ax25_route_list;
ax25_route_list = ax25_rt;
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -160,12 +166,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
ax25cmp(&route->dest_addr, &s->callsign) == 0) {
if (ax25_route_list == s) {
ax25_route_list = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
} else {
for (t = ax25_route_list; t != NULL; t = t->next) {
if (t->next == s) {
t->next = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
break;
}
}
@@ -173,6 +179,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -215,6 +222,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
out:
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return err;
}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 15ab812c4fe4..9ff98f46dc6b 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
- ax25_stop_heartbeat(ax25);
- ax25_stop_t1timer(ax25);
- ax25_stop_t2timer(ax25);
- ax25_stop_t3timer(ax25);
- ax25_stop_idletimer(ax25);
+ if (reason == ENETUNREACH) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ } else {
+ if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_stop_heartbeat(ax25);
+ ax25_stop_t1timer(ax25);
+ ax25_stop_t2timer(ax25);
+ ax25_stop_t3timer(ax25);
+ ax25_stop_idletimer(ax25);
+ }
ax25->state = AX25_STATE_0;
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 85865ebfdfa2..9f7cb0a7c73f 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -108,10 +108,12 @@ int ax25_t1timer_running(ax25_cb *ax25)
unsigned long ax25_display_timer(struct timer_list *timer)
{
+ long delta = timer->expires - jiffies;
+
if (!timer_pending(timer))
return 0;
- return timer->expires - jiffies;
+ return max(0L, delta);
}
EXPORT_SYMBOL(ax25_display_timer);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f94f538fa382..7f6a7c96ac92 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -13,13 +13,13 @@
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 71999e13f729..f1741fbfb617 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -10,13 +10,13 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
@@ -125,7 +125,6 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- memset(&link_settings, 0, sizeof(link_settings));
rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1d750f3cb2e4..033639df96d8 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -9,12 +9,12 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 2ed9496fc41f..37ce6cfb3520 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/crc16.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -64,7 +65,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv,
*/
static inline u32 batadv_choose_claim(const void *data, u32 size)
{
- struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
+ const struct batadv_bla_claim *claim = data;
u32 hash = 0;
hash = jhash(&claim->addr, sizeof(claim->addr), hash);
@@ -85,7 +86,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
const struct batadv_bla_backbone_gw *gw;
u32 hash = 0;
- gw = (struct batadv_bla_backbone_gw *)data;
+ gw = data;
hash = jhash(&gw->orig, sizeof(gw->orig), hash);
hash = jhash(&gw->vid, sizeof(gw->vid), hash);
@@ -443,7 +444,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac,
batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
skb->len + ETH_HLEN);
- netif_rx_any_context(skb);
+ netif_rx(skb);
out:
batadv_hardif_put(primary_if);
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 2f008e329007..fefb51a5f606 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -11,6 +11,7 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -20,7 +21,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netlink.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0899a729a23f..c120c7c6d25f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index b7466136e292..d26124bc27e1 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8a2b78f9c4b2..41c1ad33d009 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,11 +9,12 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
+#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/limits.h>
#include <linux/list.h>
@@ -149,25 +150,28 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
struct net *parent_net;
+ int iflink;
bool ret;
/* check if this is a batman-adv mesh interface */
if (batadv_softif_is_valid(net_dev))
return true;
- /* no more parents..stop recursion */
- if (dev_get_iflink(net_dev) == 0 ||
- dev_get_iflink(net_dev) == net_dev->ifindex)
+ iflink = dev_get_iflink(net_dev);
+ if (iflink == 0)
return false;
parent_net = batadv_getlink_net(net_dev, net);
+ /* iflink to itself, most likely physical device */
+ if (net == parent_net && iflink == net_dev->ifindex)
+ return false;
+
/* recurse over the parent device */
- parent_dev = __dev_get_by_index((struct net *)parent_net,
- dev_get_iflink(net_dev));
- /* if we got a NULL parent_dev there is something broken.. */
+ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
if (!parent_dev) {
- pr_err("Cannot find parent device\n");
+ pr_warn("Cannot find parent device. Skipping batadv-on-batadv check for %s\n",
+ net_dev->name);
return false;
}
@@ -214,14 +218,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
struct net_device *real_netdev = NULL;
struct net *real_net;
struct net *net;
- int ifindex;
+ int iflink;
ASSERT_RTNL();
if (!netdev)
return NULL;
- if (netdev->ifindex == dev_get_iflink(netdev)) {
+ iflink = dev_get_iflink(netdev);
+ if (iflink == 0) {
dev_hold(netdev);
return netdev;
}
@@ -231,9 +236,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
goto out;
net = dev_net(hard_iface->soft_iface);
- ifindex = dev_get_iflink(netdev);
real_net = batadv_getlink_net(netdev, net);
- real_netdev = dev_get_by_index(real_net, ifindex);
+
+ /* iflink to itself, most likely physical device */
+ if (net == real_net && netdev->ifindex == iflink) {
+ real_netdev = netdev;
+ dev_hold(real_netdev);
+ goto out;
+ }
+
+ real_netdev = dev_get_by_index(real_net, iflink);
out:
batadv_hardif_put(hard_iface);
@@ -296,9 +308,11 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
if (!net_device)
return false;
+#if IS_ENABLED(CONFIG_CFG80211)
/* cfg80211 drivers have to set ieee80211_ptr */
if (net_device->ieee80211_ptr)
return true;
+#endif
return false;
}
@@ -687,6 +701,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
int max_header_len = batadv_max_header_len();
int ret;
+ if (hard_iface->net_dev->mtu < ETH_MIN_MTU + max_header_len)
+ return -EINVAL;
+
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
goto out;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5207cd8d6ad8..e8a449915566 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/crc32c.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -132,7 +133,6 @@ static void __exit batadv_exit(void)
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
- flush_workqueue(batadv_event_workqueue);
destroy_workqueue(batadv_event_workqueue);
batadv_event_workqueue = NULL;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 494d1ebecac2..c48803b32bb0 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2022.0"
+#define BATADV_SOURCE_VERSION "2022.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index f4004cf0ff6f..b238455913df 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -134,7 +135,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
- if (in6_dev && in6_dev->cnf.mc_forwarding)
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 00875e1d8c44..a5e4a4e976cf 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1493,6 +1493,7 @@ struct genl_family batadv_netlink_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = batadv_netlink_ops,
.n_small_ops = ARRAY_SIZE(batadv_netlink_ops),
+ .resv_start_op = BATADV_CMD_SET_VLAN + 1,
.mcgrps = batadv_netlink_mcgrps,
.n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
};
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 974d726fabb9..5f4aeeb60dc4 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -19,7 +20,6 @@
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index aadc653ca1d8..34903df4fe93 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -8,11 +8,11 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 477d85a3b558..0379b126865d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -10,13 +10,13 @@
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 2dbbe6c19609..0f5c0679b55a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -11,6 +11,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/cpumask.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -19,7 +20,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 93730d30af54..7f3dd3c393e0 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -12,13 +12,13 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/kthread.h>
#include <linux/limits.h>
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index d673ebdd0426..5dd52bc5cabb 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -9,8 +9,6 @@
#include "main.h"
-#include <linux/bug.h>
-#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/percpu.h>
#include <linux/printk.h>
@@ -28,8 +26,6 @@
#endif /* CONFIG_BATMAN_ADV_TRACING */
-#define BATADV_MAX_MSG_LEN 256
-
TRACE_EVENT(batadv_dbg,
TP_PROTO(struct batadv_priv *bat_priv,
@@ -40,16 +36,13 @@ TRACE_EVENT(batadv_dbg,
TP_STRUCT__entry(
__string(device, bat_priv->soft_iface->name)
__string(driver, KBUILD_MODNAME)
- __dynamic_array(char, msg, BATADV_MAX_MSG_LEN)
+ __vstring(msg, vaf->fmt, vaf->va)
),
TP_fast_assign(
__assign_str(device, bat_priv->soft_iface->name);
__assign_str(driver, KBUILD_MODNAME);
- WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
- BATADV_MAX_MSG_LEN,
- vaf->fmt,
- *vaf->va) >= BATADV_MAX_MSG_LEN);
+ __assign_vstr(msg, vaf->fmt, vaf->va);
),
TP_printk(
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4b7ad6684bc4..01d30c1e412c 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -13,6 +13,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/crc32c.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -21,7 +22,6 @@
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -103,10 +103,10 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
*/
static inline u32 batadv_choose_tt(const void *data, u32 size)
{
- struct batadv_tt_common_entry *tt;
+ const struct batadv_tt_common_entry *tt;
u32 hash = 0;
- tt = (struct batadv_tt_common_entry *)data;
+ tt = data;
hash = jhash(&tt->addr, ETH_ALEN, hash);
hash = jhash(&tt->vid, sizeof(tt->vid), hash);
@@ -2766,7 +2766,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
u32 i;
tt_tot = batadv_tt_entries(tt_len);
- tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
+ tt_change = tvlv_buff;
if (!valid_cb)
return;
@@ -3994,7 +3994,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
if (tvlv_value_len < sizeof(*tt_data))
return;
- tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
num_vlan = ntohs(tt_data->num_vlan);
@@ -4037,7 +4037,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
if (tvlv_value_len < sizeof(*tt_data))
return NET_RX_SUCCESS;
- tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data);
@@ -4129,7 +4129,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
goto out;
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
- roaming_adv = (struct batadv_tvlv_roam_adv *)tvlv_value;
+ roaming_adv = tvlv_value;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received ROAMING_ADV from %pM (client %pM)\n",
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 0cb58eb04093..7ec2e2343884 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -7,10 +7,10 @@
#include "main.h"
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 2be5d4a712c5..758cd797a063 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1740,45 +1740,6 @@ struct batadv_priv {
#endif
};
-/**
- * struct batadv_socket_client - layer2 icmp socket client data
- */
-struct batadv_socket_client {
- /**
- * @queue_list: packet queue for packets destined for this socket client
- */
- struct list_head queue_list;
-
- /** @queue_len: number of packets in the packet queue (queue_list) */
- unsigned int queue_len;
-
- /** @index: socket client's index in the batadv_socket_client_hash */
- unsigned char index;
-
- /** @lock: lock protecting queue_list, queue_len & index */
- spinlock_t lock;
-
- /** @queue_wait: socket client's wait queue */
- wait_queue_head_t queue_wait;
-
- /** @bat_priv: pointer to soft_iface this client belongs to */
- struct batadv_priv *bat_priv;
-};
-
-/**
- * struct batadv_socket_packet - layer2 icmp packet for socket client
- */
-struct batadv_socket_packet {
- /** @list: list node for &batadv_socket_client.queue_list */
- struct list_head list;
-
- /** @icmp_len: size of the layer2 icmp packet */
- size_t icmp_len;
-
- /** @icmp_packet: layer2 icmp packet */
- u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
-};
-
#ifdef CONFIG_BATMAN_ADV_BLA
/**
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 133d7ea063fb..215af9b3b589 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -240,7 +240,7 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
if (!skb_cp)
return NET_RX_DROP;
- return netif_rx_ni(skb_cp);
+ return netif_rx(skb_cp);
}
static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
@@ -641,7 +641,6 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
return NULL;
peer->chan = chan;
- memset(&peer->peer_addr, 0, sizeof(struct in6_addr));
baswap((void *)peer->lladdr, &chan->dst);
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index e0ab4cd7afc3..ae3bdc6dfc92 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -29,6 +29,7 @@ menuconfig BT
SCO audio links
L2CAP (Logical Link Control and Adaptation Protocol)
SMP (Security Manager Protocol) on LE (Low Energy) links
+ ISO isochronous links
HCI Device drivers (Interface to the hardware)
RFCOMM Module (RFCOMM Protocol)
BNEP Module (Bluetooth Network Encapsulation Protocol)
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index a52bba8500e1..0e7b7db42750 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -18,6 +18,7 @@ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
eir.o hci_sync.o
bluetooth-$(CONFIG_BT_BREDR) += sco.o
+bluetooth-$(CONFIG_BT_LE) += iso.o
bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
bluetooth-$(CONFIG_BT_LEDS) += leds.o
bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1661979b6a6e..dc65974f5adb 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -38,7 +38,7 @@
#include "selftest.h"
/* Bluetooth sockets */
-#define BT_MAX_PROTO 8
+#define BT_MAX_PROTO (BTPROTO_LAST + 1)
static const struct net_proto_family *bt_proto[BT_MAX_PROTO];
static DEFINE_RWLOCK(bt_proto_lock);
@@ -52,6 +52,7 @@ static const char *const bt_key_strings[BT_MAX_PROTO] = {
"sk_lock-AF_BLUETOOTH-BTPROTO_CMTP",
"sk_lock-AF_BLUETOOTH-BTPROTO_HIDP",
"sk_lock-AF_BLUETOOTH-BTPROTO_AVDTP",
+ "sk_lock-AF_BLUETOOTH-BTPROTO_ISO",
};
static struct lock_class_key bt_slock_key[BT_MAX_PROTO];
@@ -64,6 +65,7 @@ static const char *const bt_slock_key_strings[BT_MAX_PROTO] = {
"slock-AF_BLUETOOTH-BTPROTO_CMTP",
"slock-AF_BLUETOOTH-BTPROTO_HIDP",
"slock-AF_BLUETOOTH-BTPROTO_AVDTP",
+ "slock-AF_BLUETOOTH-BTPROTO_ISO",
};
void bt_sock_reclassify_lock(struct sock *sk, int proto)
@@ -251,7 +253,6 @@ EXPORT_SYMBOL(bt_accept_dequeue);
int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
size_t copied;
@@ -263,7 +264,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
@@ -281,7 +282,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_reset_transport_header(skb);
err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err == 0) {
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name && bt_sk(sk)->skb_msg_name)
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
@@ -385,7 +386,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
copied += chunk;
size -= chunk;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
int skb_len = skb_headlen(skb);
@@ -568,7 +569,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
EXPORT_SYMBOL(bt_sock_wait_state);
/* This function expects the sk lock to be held when called */
-int bt_sock_wait_ready(struct sock *sk, unsigned long flags)
+int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags)
{
DECLARE_WAITQUEUE(wait, current);
unsigned long timeo;
@@ -576,7 +577,7 @@ int bt_sock_wait_ready(struct sock *sk, unsigned long flags)
BT_DBG("sk %p", sk);
- timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_sndtimeo(sk, !!(msg_flags & MSG_DONTWAIT));
add_wait_queue(sk_sleep(sk), &wait);
set_current_state(TASK_INTERRUPTIBLE);
@@ -611,7 +612,7 @@ EXPORT_SYMBOL(bt_sock_wait_ready);
static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_lock(&l->lock);
return seq_hlist_start_head(&l->head, *pos);
@@ -619,7 +620,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
return seq_hlist_next(v, &l->head, pos);
}
@@ -627,14 +628,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void bt_seq_stop(struct seq_file *seq, void *v)
__releases(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_unlock(&l->lock);
}
static int bt_seq_show(struct seq_file *seq, void *v)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
if (v == SEQ_START_TOKEN) {
seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent");
diff --git a/net/bluetooth/aosp.c b/net/bluetooth/aosp.c
index 432ae3aac9e3..1d67836e95e1 100644
--- a/net/bluetooth/aosp.c
+++ b/net/bluetooth/aosp.c
@@ -54,7 +54,10 @@ void aosp_do_open(struct hci_dev *hdev)
/* LE Get Vendor Capabilities Command */
skb = __hci_cmd_sync(hdev, hci_opcode_pack(0x3f, 0x153), 0, NULL,
HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
+ if (IS_ERR_OR_NULL(skb)) {
+ if (!skb)
+ skb = ERR_PTR(-EIO);
+
bt_dev_err(hdev, "AOSP get vendor capabilities (%ld)",
PTR_ERR(skb));
return;
@@ -152,7 +155,10 @@ static int enable_quality_report(struct hci_dev *hdev)
skb = __hci_cmd_sync(hdev, BQR_OPCODE, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
+ if (IS_ERR_OR_NULL(skb)) {
+ if (!skb)
+ skb = ERR_PTR(-EIO);
+
bt_dev_err(hdev, "Enabling Android BQR failed (%ld)",
PTR_ERR(skb));
return PTR_ERR(skb);
@@ -171,7 +177,10 @@ static int disable_quality_report(struct hci_dev *hdev)
skb = __hci_cmd_sync(hdev, BQR_OPCODE, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
+ if (IS_ERR_OR_NULL(skb)) {
+ if (!skb)
+ skb = ERR_PTR(-EIO);
+
bt_dev_err(hdev, "Disabling Android BQR failed (%ld)",
PTR_ERR(skb));
return PTR_ERR(skb);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index c9add7753b9f..5a6a49885ab6 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -400,7 +400,7 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
dev->stats.rx_packets++;
nskb->ip_summed = CHECKSUM_NONE;
nskb->protocol = eth_type_trans(nskb, dev);
- netif_rx_ni(nskb);
+ netif_rx(nskb);
return 0;
badframe:
@@ -535,7 +535,7 @@ static int bnep_session(void *arg)
up_write(&bnep_session_sem);
free_netdev(dev);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 83eb84e8e688..90d130588a3e 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -323,7 +323,7 @@ static int cmtp_session(void *arg)
up_write(&cmtp_session_sem);
kfree(session);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 7e930f77ecab..8a85f6cdfbc1 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -13,6 +13,20 @@
#define PNP_INFO_SVCLASS_ID 0x1200
+static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len)
+{
+ u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
+
+ /* If data is already NULL terminated just pass it directly */
+ if (data[data_len - 1] == '\0')
+ return eir_append_data(eir, eir_len, type, data, data_len);
+
+ memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH);
+ name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
+
+ return eir_append_data(eir, eir_len, type, name, sizeof(name));
+}
+
u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
{
size_t short_len;
@@ -23,29 +37,26 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
return ad_len;
/* use complete name if present and fits */
- complete_len = strlen(hdev->dev_name);
+ complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name));
if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH)
- return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
+ return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE,
hdev->dev_name, complete_len + 1);
/* use short name if present */
- short_len = strlen(hdev->short_name);
+ short_len = strnlen(hdev->short_name, sizeof(hdev->short_name));
if (short_len)
- return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
- hdev->short_name, short_len + 1);
+ return eir_append_name(ptr, ad_len, EIR_NAME_SHORT,
+ hdev->short_name,
+ short_len == HCI_MAX_SHORT_NAME_LENGTH ?
+ short_len : short_len + 1);
/* use shortened full name if present, we already know that name
* is longer then HCI_MAX_SHORT_NAME_LENGTH
*/
- if (complete_len) {
- u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
-
- memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH);
- name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
-
- return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name,
- sizeof(name));
- }
+ if (complete_len)
+ return eir_append_name(ptr, ad_len, EIR_NAME_SHORT,
+ hdev->dev_name,
+ HCI_MAX_SHORT_NAME_LENGTH);
return ad_len;
}
@@ -55,6 +66,19 @@ u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
}
+u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data,
+ u8 data_len)
+{
+ eir[eir_len++] = sizeof(u8) + sizeof(uuid) + data_len;
+ eir[eir_len++] = EIR_SERVICE_DATA;
+ put_unaligned_le16(uuid, &eir[eir_len]);
+ eir_len += sizeof(uuid);
+ memcpy(&eir[eir_len], data, data_len);
+ eir_len += data_len;
+
+ return eir_len;
+}
+
static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
{
u8 *ptr = data, *uuids_start = NULL;
@@ -168,7 +192,7 @@ void eir_create(struct hci_dev *hdev, u8 *data)
u8 *ptr = data;
size_t name_len;
- name_len = strlen(hdev->dev_name);
+ name_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name));
if (name_len > 0) {
/* EIR Data type */
@@ -212,6 +236,27 @@ void eir_create(struct hci_dev *hdev, u8 *data)
ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
}
+u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
+{
+ struct adv_info *adv = NULL;
+ u8 ad_len = 0;
+
+ /* Return 0 when the current instance identifier is invalid. */
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv)
+ return 0;
+ }
+
+ if (adv) {
+ memcpy(ptr, adv->per_adv_data, adv->per_adv_data_len);
+ ad_len += adv->per_adv_data_len;
+ ptr += adv->per_adv_data_len;
+ }
+
+ return ad_len;
+}
+
u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
{
struct adv_info *adv = NULL;
@@ -333,3 +378,21 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
return scan_rsp_len;
}
+
+void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
+{
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ u16 value = get_unaligned_le16(eir);
+
+ if (uuid == value) {
+ if (len)
+ *len -= 2;
+ return &eir[2];
+ }
+
+ eir += *len;
+ eir_len -= *len;
+ }
+
+ return NULL;
+}
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 05e2e917fc25..0df19f2f4af9 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -11,9 +11,17 @@ void eir_create(struct hci_dev *hdev, u8 *data);
u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
+u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len);
u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data,
+ u8 data_len);
+
+static inline u16 eir_precalc_len(u8 data_len)
+{
+ return sizeof(u8) * 2 + data_len;
+}
static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
u8 *data, u8 data_len)
@@ -36,6 +44,21 @@ static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
return eir_len;
}
+static inline u16 eir_skb_put_data(struct sk_buff *skb, u8 type, u8 *data, u8 data_len)
+{
+ u8 *eir;
+ u16 eir_len;
+
+ eir_len = eir_precalc_len(data_len);
+ eir = skb_put(skb, eir_len);
+ WARN_ON(sizeof(type) + data_len > U8_MAX);
+ eir[0] = sizeof(type) + data_len;
+ eir[1] = type;
+ memcpy(&eir[2], data, data_len);
+
+ return eir_len;
+}
+
static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
size_t *data_len)
{
@@ -72,3 +95,5 @@ static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
return NULL;
}
+
+void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 04ebe901e86f..a6c12863a253 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -30,10 +30,13 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
+#include <net/bluetooth/iso.h>
+#include <net/bluetooth/mgmt.h>
#include "hci_request.h"
#include "smp.h"
#include "a2mp.h"
+#include "eir.h"
struct sco_param {
u16 pkt_type;
@@ -41,6 +44,11 @@ struct sco_param {
u8 retrans_effort;
};
+struct conn_handle_t {
+ struct hci_conn *conn;
+ __u16 handle;
+};
+
static const struct sco_param esco_param_cvsd[] = {
{ EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a, 0x01 }, /* S3 */
{ EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007, 0x01 }, /* S2 */
@@ -118,10 +126,16 @@ static void hci_conn_cleanup(struct hci_conn *conn)
if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags))
hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type);
+ if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
+ hci_remove_link_key(hdev, &conn->dst);
+
hci_chan_list_flush(conn);
hci_conn_hash_del(hdev, conn);
+ if (conn->cleanup)
+ conn->cleanup(conn);
+
if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_CVSD:
@@ -307,17 +321,60 @@ static bool find_next_esco_param(struct hci_conn *conn,
return conn->attempt <= size;
}
-static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
+static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
{
- struct hci_dev *hdev = conn->hdev;
+ int err;
+ __u8 vnd_len, *vnd_data = NULL;
+ struct hci_op_configure_data_path *cmd = NULL;
+
+ err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
+ &vnd_data);
+ if (err < 0)
+ goto error;
+
+ cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ err = hdev->get_data_path_id(hdev, &cmd->data_path_id);
+ if (err < 0)
+ goto error;
+
+ cmd->vnd_len = vnd_len;
+ memcpy(cmd->vnd_data, vnd_data, vnd_len);
+
+ cmd->direction = 0x00;
+ __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH,
+ sizeof(*cmd) + vnd_len, cmd, HCI_CMD_TIMEOUT);
+
+ cmd->direction = 0x01;
+ err = __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH,
+ sizeof(*cmd) + vnd_len, cmd,
+ HCI_CMD_TIMEOUT);
+error:
+
+ kfree(cmd);
+ kfree(vnd_data);
+ return err;
+}
+
+static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data)
+{
+ struct conn_handle_t *conn_handle = data;
+ struct hci_conn *conn = conn_handle->conn;
+ __u16 handle = conn_handle->handle;
struct hci_cp_enhanced_setup_sync_conn cp;
const struct sco_param *param;
+ kfree(conn_handle);
+
bt_dev_dbg(hdev, "hcon %p", conn);
/* for offload use case, codec needs to configured before opening SCO */
if (conn->codec.data_path)
- hci_req_configure_datapath(hdev, &conn->codec);
+ configure_datapath_sync(hdev, &conn->codec);
conn->state = BT_CONNECT;
conn->out = true;
@@ -335,7 +392,7 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
case BT_CODEC_MSBC:
if (!find_next_esco_param(conn, esco_param_msbc,
ARRAY_SIZE(esco_param_msbc)))
- return false;
+ return -EINVAL;
param = &esco_param_msbc[conn->attempt - 1];
cp.tx_coding_format.id = 0x05;
@@ -387,11 +444,11 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
if (lmp_esco_capable(conn->link)) {
if (!find_next_esco_param(conn, esco_param_cvsd,
ARRAY_SIZE(esco_param_cvsd)))
- return false;
+ return -EINVAL;
param = &esco_param_cvsd[conn->attempt - 1];
} else {
if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
- return false;
+ return -EINVAL;
param = &sco_param_cvsd[conn->attempt - 1];
}
cp.tx_coding_format.id = 2;
@@ -414,7 +471,7 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
cp.out_transport_unit_size = 16;
break;
default:
- return false;
+ return -EINVAL;
}
cp.retrans_effort = param->retrans_effort;
@@ -422,9 +479,9 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle)
cp.max_latency = __cpu_to_le16(param->max_latency);
if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0)
- return false;
+ return -EIO;
- return true;
+ return 0;
}
static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
@@ -481,8 +538,24 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
{
- if (enhanced_sco_capable(conn->hdev))
- return hci_enhanced_setup_sync_conn(conn, handle);
+ int result;
+ struct conn_handle_t *conn_handle;
+
+ if (enhanced_sync_conn_capable(conn->hdev)) {
+ conn_handle = kzalloc(sizeof(*conn_handle), GFP_KERNEL);
+
+ if (!conn_handle)
+ return false;
+
+ conn_handle->conn = conn;
+ conn_handle->handle = handle;
+ result = hci_cmd_sync_queue(conn->hdev, hci_enhanced_setup_sync,
+ conn_handle, NULL);
+ if (result < 0)
+ kfree(conn_handle);
+
+ return result == 0;
+ }
return hci_setup_sync_conn(conn, handle);
}
@@ -669,13 +742,208 @@ static void le_conn_timeout(struct work_struct *work)
if (conn->role == HCI_ROLE_SLAVE) {
/* Disable LE Advertising */
le_disable_advertising(hdev);
- hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_dev_unlock(hdev);
return;
}
hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
}
+struct iso_list_data {
+ union {
+ u8 cig;
+ u8 big;
+ };
+ union {
+ u8 cis;
+ u8 bis;
+ u16 sync_handle;
+ };
+ int count;
+ struct {
+ struct hci_cp_le_set_cig_params cp;
+ struct hci_cis_params cis[0x11];
+ } pdu;
+};
+
+static void bis_list(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Skip if not broadcast/ANY address */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ if (d->big != conn->iso_qos.big || d->bis == BT_ISO_QOS_BIS_UNSET ||
+ d->bis != conn->iso_qos.bis)
+ return;
+
+ d->count++;
+}
+
+static void find_bis(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Ignore unicast */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ d->count++;
+}
+
+static int terminate_big_sync(struct hci_dev *hdev, void *data)
+{
+ struct iso_list_data *d = data;
+
+ bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", d->big, d->bis);
+
+ hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL);
+
+ /* Check if ISO connection is a BIS and terminate BIG if there are
+ * no other connections using it.
+ */
+ hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
+ if (d->count)
+ return 0;
+
+ return hci_le_terminate_big_sync(hdev, d->big,
+ HCI_ERROR_LOCAL_HOST_TERM);
+}
+
+static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
+{
+ kfree(data);
+}
+
+static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
+{
+ struct iso_list_data *d;
+
+ bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
+
+ d = kmalloc(sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ memset(d, 0, sizeof(*d));
+ d->big = big;
+ d->bis = bis;
+
+ return hci_cmd_sync_queue(hdev, terminate_big_sync, d,
+ terminate_big_destroy);
+}
+
+static int big_terminate_sync(struct hci_dev *hdev, void *data)
+{
+ struct iso_list_data *d = data;
+
+ bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", d->big,
+ d->sync_handle);
+
+ /* Check if ISO connection is a BIS and terminate BIG if there are
+ * no other connections using it.
+ */
+ hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
+ if (d->count)
+ return 0;
+
+ hci_le_big_terminate_sync(hdev, d->big);
+
+ return hci_le_pa_terminate_sync(hdev, d->sync_handle);
+}
+
+static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
+{
+ struct iso_list_data *d;
+
+ bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
+
+ d = kmalloc(sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ memset(d, 0, sizeof(*d));
+ d->big = big;
+ d->sync_handle = sync_handle;
+
+ return hci_cmd_sync_queue(hdev, big_terminate_sync, d,
+ terminate_big_destroy);
+}
+
+/* Cleanup BIS connection
+ *
+ * Detects if there any BIS left connected in a BIG
+ * broadcaster: Remove advertising instance and terminate BIG.
+ * broadcaster receiver: Teminate BIG sync and terminate PA sync.
+ */
+static void bis_cleanup(struct hci_conn *conn)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "conn %p", conn);
+
+ if (conn->role == HCI_ROLE_MASTER) {
+ if (!test_and_clear_bit(HCI_CONN_PER_ADV, &conn->flags))
+ return;
+
+ hci_le_terminate_big(hdev, conn->iso_qos.big,
+ conn->iso_qos.bis);
+ } else {
+ hci_le_big_terminate(hdev, conn->iso_qos.big,
+ conn->sync_handle);
+ }
+}
+
+static int remove_cig_sync(struct hci_dev *hdev, void *data)
+{
+ u8 handle = PTR_ERR(data);
+
+ return hci_le_remove_cig_sync(hdev, handle);
+}
+
+static int hci_le_remove_cig(struct hci_dev *hdev, u8 handle)
+{
+ bt_dev_dbg(hdev, "handle 0x%2.2x", handle);
+
+ return hci_cmd_sync_queue(hdev, remove_cig_sync, ERR_PTR(handle), NULL);
+}
+
+static void find_cis(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Ignore broadcast */
+ if (!bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ d->count++;
+}
+
+/* Cleanup CIS connection:
+ *
+ * Detects if there any CIS left connected in a CIG and remove it.
+ */
+static void cis_cleanup(struct hci_conn *conn)
+{
+ struct hci_dev *hdev = conn->hdev;
+ struct iso_list_data d;
+
+ memset(&d, 0, sizeof(d));
+ d.cig = conn->iso_qos.cig;
+
+ /* Check if ISO connection is a CIS and remove CIG if there are
+ * no other connections using it.
+ */
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d);
+ if (d.count)
+ return;
+
+ hci_le_remove_cig(hdev, conn->iso_qos.cig);
+}
+
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role)
{
@@ -689,6 +957,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
+ conn->handle = HCI_CONN_HANDLE_UNSET;
conn->hdev = hdev;
conn->type = type;
conn->role = role;
@@ -719,6 +988,17 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
/* conn->src should reflect the local identity address */
hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
break;
+ case ISO_LINK:
+ /* conn->src should reflect the local identity address */
+ hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
+
+ /* set proper cleanup function */
+ if (!bacmp(dst, BDADDR_ANY))
+ conn->cleanup = bis_cleanup;
+ else if (conn->role == HCI_ROLE_MASTER)
+ conn->cleanup = cis_cleanup;
+
+ break;
case SCO_LINK:
if (lmp_esco_capable(hdev))
conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
@@ -787,10 +1067,21 @@ int hci_conn_del(struct hci_conn *conn)
hdev->acl_cnt += conn->sent;
} else {
struct hci_conn *acl = conn->link;
+
if (acl) {
acl->link = NULL;
hci_conn_drop(acl);
}
+
+ /* Unacked ISO frames */
+ if (conn->type == ISO_LINK) {
+ if (hdev->iso_pkts)
+ hdev->iso_cnt += conn->sent;
+ else if (hdev->le_pkts)
+ hdev->le_cnt += conn->sent;
+ else
+ hdev->acl_cnt += conn->sent;
+ }
}
if (conn->amp_mgr)
@@ -870,7 +1161,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
EXPORT_SYMBOL(hci_get_route);
/* This function requires the caller holds hdev->lock */
-void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
struct hci_conn_params *params;
@@ -883,8 +1174,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
params->conn = NULL;
}
- conn->state = BT_CLOSED;
-
/* If the status indicates successful cancellation of
* the attempt (i.e. Unknown Connection Id) there's no point of
* notifying failure since we'll go back to keep trying to
@@ -896,10 +1185,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
- hci_connect_cfm(conn, status);
-
- hci_conn_del(conn);
-
/* Since we may have temporarily stopped the background scanning in
* favor of connection establishment, we should restart it.
*/
@@ -911,6 +1196,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
hci_enable_advertising(hdev);
}
+/* This function requires the caller holds hdev->lock */
+void hci_conn_failed(struct hci_conn *conn, u8 status)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ switch (conn->type) {
+ case LE_LINK:
+ hci_le_conn_failed(conn, status);
+ break;
+ case ACL_LINK:
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ break;
+ }
+
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
struct hci_conn *conn = data;
@@ -924,10 +1231,11 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_err(hdev, "request failed to create LE connection: err %d", err);
- if (!conn)
+ /* Check if connection is still pending */
+ if (conn != hci_lookup_le_connect(hdev))
goto done;
- hci_le_conn_failed(conn, err);
+ hci_conn_failed(conn, bt_status(err));
done:
hci_dev_unlock(hdev);
@@ -1073,6 +1381,108 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
return 0;
}
+static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
+{
+ struct iso_list_data data;
+
+ /* Allocate a BIG if not set */
+ if (qos->big == BT_ISO_QOS_BIG_UNSET) {
+ for (data.big = 0x00; data.big < 0xef; data.big++) {
+ data.count = 0;
+ data.bis = 0xff;
+
+ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
+ BT_BOUND, &data);
+ if (!data.count)
+ break;
+ }
+
+ if (data.big == 0xef)
+ return -EADDRNOTAVAIL;
+
+ /* Update BIG */
+ qos->big = data.big;
+ }
+
+ return 0;
+}
+
+static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
+{
+ struct iso_list_data data;
+
+ /* Allocate BIS if not set */
+ if (qos->bis == BT_ISO_QOS_BIS_UNSET) {
+ /* Find an unused adv set to advertise BIS, skip instance 0x00
+ * since it is reserved as general purpose set.
+ */
+ for (data.bis = 0x01; data.bis < hdev->le_num_of_adv_sets;
+ data.bis++) {
+ data.count = 0;
+
+ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
+ BT_BOUND, &data);
+ if (!data.count)
+ break;
+ }
+
+ if (data.bis == hdev->le_num_of_adv_sets)
+ return -EADDRNOTAVAIL;
+
+ /* Update BIS */
+ qos->bis = data.bis;
+ }
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ struct bt_iso_qos *qos)
+{
+ struct hci_conn *conn;
+ struct iso_list_data data;
+ int err;
+
+ /* Let's make sure that le is enabled.*/
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
+ if (lmp_le_capable(hdev))
+ return ERR_PTR(-ECONNREFUSED);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ err = qos_set_big(hdev, qos);
+ if (err)
+ return ERR_PTR(err);
+
+ err = qos_set_bis(hdev, qos);
+ if (err)
+ return ERR_PTR(err);
+
+ data.big = qos->big;
+ data.bis = qos->bis;
+ data.count = 0;
+
+ /* Check if there is already a matching BIG/BIS */
+ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, BT_BOUND, &data);
+ if (data.count)
+ return ERR_PTR(-EADDRINUSE);
+
+ conn = hci_conn_hash_lookup_bis(hdev, dst, qos->big, qos->bis);
+ if (conn)
+ return ERR_PTR(-EADDRINUSE);
+
+ conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ set_bit(HCI_CONN_PER_ADV, &conn->flags);
+ conn->state = BT_CONNECT;
+
+ hci_conn_hold(conn);
+ return conn;
+}
+
/* This function requires the caller holds hdev->lock */
struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, u8 sec_level,
@@ -1209,6 +1619,577 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
return sco;
}
+static void cis_add(struct iso_list_data *d, struct bt_iso_qos *qos)
+{
+ struct hci_cis_params *cis = &d->pdu.cis[d->pdu.cp.num_cis];
+
+ cis->cis_id = qos->cis;
+ cis->c_sdu = cpu_to_le16(qos->out.sdu);
+ cis->p_sdu = cpu_to_le16(qos->in.sdu);
+ cis->c_phy = qos->out.phy ? qos->out.phy : qos->in.phy;
+ cis->p_phy = qos->in.phy ? qos->in.phy : qos->out.phy;
+ cis->c_rtn = qos->out.rtn;
+ cis->p_rtn = qos->in.rtn;
+
+ d->pdu.cp.num_cis++;
+}
+
+static void cis_list(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Skip if broadcast/ANY address */
+ if (!bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ if (d->cig != conn->iso_qos.cig || d->cis == BT_ISO_QOS_CIS_UNSET ||
+ d->cis != conn->iso_qos.cis)
+ return;
+
+ d->count++;
+
+ if (d->pdu.cp.cig_id == BT_ISO_QOS_CIG_UNSET ||
+ d->count >= ARRAY_SIZE(d->pdu.cis))
+ return;
+
+ cis_add(d, &conn->iso_qos);
+}
+
+static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
+{
+ struct hci_dev *hdev = conn->hdev;
+ struct hci_cp_le_create_big cp;
+
+ memset(&cp, 0, sizeof(cp));
+
+ cp.handle = qos->big;
+ cp.adv_handle = qos->bis;
+ cp.num_bis = 0x01;
+ hci_cpu_to_le24(qos->out.interval, cp.bis.sdu_interval);
+ cp.bis.sdu = cpu_to_le16(qos->out.sdu);
+ cp.bis.latency = cpu_to_le16(qos->out.latency);
+ cp.bis.rtn = qos->out.rtn;
+ cp.bis.phy = qos->out.phy;
+ cp.bis.packing = qos->packing;
+ cp.bis.framing = qos->framing;
+ cp.bis.encryption = 0x00;
+ memset(&cp.bis.bcode, 0, sizeof(cp.bis.bcode));
+
+ return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp);
+}
+
+static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
+{
+ struct hci_dev *hdev = conn->hdev;
+ struct iso_list_data data;
+
+ memset(&data, 0, sizeof(data));
+
+ /* Allocate a CIG if not set */
+ if (qos->cig == BT_ISO_QOS_CIG_UNSET) {
+ for (data.cig = 0x00; data.cig < 0xff; data.cig++) {
+ data.count = 0;
+ data.cis = 0xff;
+
+ hci_conn_hash_list_state(hdev, cis_list, ISO_LINK,
+ BT_BOUND, &data);
+ if (data.count)
+ continue;
+
+ hci_conn_hash_list_state(hdev, cis_list, ISO_LINK,
+ BT_CONNECTED, &data);
+ if (!data.count)
+ break;
+ }
+
+ if (data.cig == 0xff)
+ return false;
+
+ /* Update CIG */
+ qos->cig = data.cig;
+ }
+
+ data.pdu.cp.cig_id = qos->cig;
+ hci_cpu_to_le24(qos->out.interval, data.pdu.cp.c_interval);
+ hci_cpu_to_le24(qos->in.interval, data.pdu.cp.p_interval);
+ data.pdu.cp.sca = qos->sca;
+ data.pdu.cp.packing = qos->packing;
+ data.pdu.cp.framing = qos->framing;
+ data.pdu.cp.c_latency = cpu_to_le16(qos->out.latency);
+ data.pdu.cp.p_latency = cpu_to_le16(qos->in.latency);
+
+ if (qos->cis != BT_ISO_QOS_CIS_UNSET) {
+ data.count = 0;
+ data.cig = qos->cig;
+ data.cis = qos->cis;
+
+ hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
+ &data);
+ if (data.count)
+ return false;
+
+ cis_add(&data, qos);
+ }
+
+ /* Reprogram all CIS(s) with the same CIG */
+ for (data.cig = qos->cig, data.cis = 0x00; data.cis < 0x11;
+ data.cis++) {
+ data.count = 0;
+
+ hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
+ &data);
+ if (data.count)
+ continue;
+
+ /* Allocate a CIS if not set */
+ if (qos->cis == BT_ISO_QOS_CIS_UNSET) {
+ /* Update CIS */
+ qos->cis = data.cis;
+ cis_add(&data, qos);
+ }
+ }
+
+ if (qos->cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis)
+ return false;
+
+ if (hci_send_cmd(hdev, HCI_OP_LE_SET_CIG_PARAMS,
+ sizeof(data.pdu.cp) +
+ (data.pdu.cp.num_cis * sizeof(*data.pdu.cis)),
+ &data.pdu) < 0)
+ return false;
+
+ return true;
+}
+
+struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos)
+{
+ struct hci_conn *cis;
+
+ cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type);
+ if (!cis) {
+ cis = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
+ if (!cis)
+ return ERR_PTR(-ENOMEM);
+ cis->cleanup = cis_cleanup;
+ cis->dst_type = dst_type;
+ }
+
+ if (cis->state == BT_CONNECTED)
+ return cis;
+
+ /* Check if CIS has been set and the settings matches */
+ if (cis->state == BT_BOUND &&
+ !memcmp(&cis->iso_qos, qos, sizeof(*qos)))
+ return cis;
+
+ /* Update LINK PHYs according to QoS preference */
+ cis->le_tx_phy = qos->out.phy;
+ cis->le_rx_phy = qos->in.phy;
+
+ /* If output interval is not set use the input interval as it cannot be
+ * 0x000000.
+ */
+ if (!qos->out.interval)
+ qos->out.interval = qos->in.interval;
+
+ /* If input interval is not set use the output interval as it cannot be
+ * 0x000000.
+ */
+ if (!qos->in.interval)
+ qos->in.interval = qos->out.interval;
+
+ /* If output latency is not set use the input latency as it cannot be
+ * 0x0000.
+ */
+ if (!qos->out.latency)
+ qos->out.latency = qos->in.latency;
+
+ /* If input latency is not set use the output latency as it cannot be
+ * 0x0000.
+ */
+ if (!qos->in.latency)
+ qos->in.latency = qos->out.latency;
+
+ if (!hci_le_set_cig_params(cis, qos)) {
+ hci_conn_drop(cis);
+ return ERR_PTR(-EINVAL);
+ }
+
+ cis->iso_qos = *qos;
+ cis->state = BT_BOUND;
+
+ return cis;
+}
+
+bool hci_iso_setup_path(struct hci_conn *conn)
+{
+ struct hci_dev *hdev = conn->hdev;
+ struct hci_cp_le_setup_iso_path cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ if (conn->iso_qos.out.sdu) {
+ cmd.handle = cpu_to_le16(conn->handle);
+ cmd.direction = 0x00; /* Input (Host to Controller) */
+ cmd.path = 0x00; /* HCI path if enabled */
+ cmd.codec = 0x03; /* Transparent Data */
+
+ if (hci_send_cmd(hdev, HCI_OP_LE_SETUP_ISO_PATH, sizeof(cmd),
+ &cmd) < 0)
+ return false;
+ }
+
+ if (conn->iso_qos.in.sdu) {
+ cmd.handle = cpu_to_le16(conn->handle);
+ cmd.direction = 0x01; /* Output (Controller to Host) */
+ cmd.path = 0x00; /* HCI path if enabled */
+ cmd.codec = 0x03; /* Transparent Data */
+
+ if (hci_send_cmd(hdev, HCI_OP_LE_SETUP_ISO_PATH, sizeof(cmd),
+ &cmd) < 0)
+ return false;
+ }
+
+ return true;
+}
+
+static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+{
+ struct {
+ struct hci_cp_le_create_cis cp;
+ struct hci_cis cis[0x1f];
+ } cmd;
+ struct hci_conn *conn = data;
+ u8 cig;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cis[0].acl_handle = cpu_to_le16(conn->link->handle);
+ cmd.cis[0].cis_handle = cpu_to_le16(conn->handle);
+ cmd.cp.num_cis++;
+ cig = conn->iso_qos.cig;
+
+ hci_dev_lock(hdev);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+
+ if (conn == data || conn->type != ISO_LINK ||
+ conn->state == BT_CONNECTED || conn->iso_qos.cig != cig)
+ continue;
+
+ /* Check if all CIS(s) belonging to a CIG are ready */
+ if (conn->link->state != BT_CONNECTED ||
+ conn->state != BT_CONNECT) {
+ cmd.cp.num_cis = 0;
+ break;
+ }
+
+ /* Group all CIS with state BT_CONNECT since the spec don't
+ * allow to send them individually:
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2566:
+ *
+ * If the Host issues this command before all the
+ * HCI_LE_CIS_Established events from the previous use of the
+ * command have been generated, the Controller shall return the
+ * error code Command Disallowed (0x0C).
+ */
+ cis->acl_handle = cpu_to_le16(conn->link->handle);
+ cis->cis_handle = cpu_to_le16(conn->handle);
+ cmd.cp.num_cis++;
+ }
+
+ rcu_read_unlock();
+
+ hci_dev_unlock(hdev);
+
+ if (!cmd.cp.num_cis)
+ return 0;
+
+ return hci_send_cmd(hdev, HCI_OP_LE_CREATE_CIS, sizeof(cmd.cp) +
+ sizeof(cmd.cis[0]) * cmd.cp.num_cis, &cmd);
+}
+
+int hci_le_create_cis(struct hci_conn *conn)
+{
+ struct hci_conn *cis;
+ struct hci_dev *hdev = conn->hdev;
+ int err;
+
+ switch (conn->type) {
+ case LE_LINK:
+ if (!conn->link || conn->state != BT_CONNECTED)
+ return -EINVAL;
+ cis = conn->link;
+ break;
+ case ISO_LINK:
+ cis = conn;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (cis->state == BT_CONNECT)
+ return 0;
+
+ /* Queue Create CIS */
+ err = hci_cmd_sync_queue(hdev, hci_create_cis_sync, cis, NULL);
+ if (err)
+ return err;
+
+ cis->state = BT_CONNECT;
+
+ return 0;
+}
+
+static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
+ struct bt_iso_io_qos *qos, __u8 phy)
+{
+ /* Only set MTU if PHY is enabled */
+ if (!qos->sdu && qos->phy) {
+ if (hdev->iso_mtu > 0)
+ qos->sdu = hdev->iso_mtu;
+ else if (hdev->le_mtu > 0)
+ qos->sdu = hdev->le_mtu;
+ else
+ qos->sdu = hdev->acl_mtu;
+ }
+
+ /* Use the same PHY as ACL if set to any */
+ if (qos->phy == BT_ISO_PHY_ANY)
+ qos->phy = phy;
+
+ /* Use LE ACL connection interval if not set */
+ if (!qos->interval)
+ /* ACL interval unit in 1.25 ms to us */
+ qos->interval = conn->le_conn_interval * 1250;
+
+ /* Use LE ACL connection latency if not set */
+ if (!qos->latency)
+ qos->latency = conn->le_conn_latency;
+}
+
+static struct hci_conn *hci_bind_bis(struct hci_conn *conn,
+ struct bt_iso_qos *qos)
+{
+ /* Update LINK PHYs according to QoS preference */
+ conn->le_tx_phy = qos->out.phy;
+ conn->le_tx_phy = qos->out.phy;
+ conn->iso_qos = *qos;
+ conn->state = BT_BOUND;
+
+ return conn;
+}
+
+static int create_big_sync(struct hci_dev *hdev, void *data)
+{
+ struct hci_conn *conn = data;
+ struct bt_iso_qos *qos = &conn->iso_qos;
+ u16 interval, sync_interval = 0;
+ u32 flags = 0;
+ int err;
+
+ if (qos->out.phy == 0x02)
+ flags |= MGMT_ADV_FLAG_SEC_2M;
+
+ /* Align intervals */
+ interval = qos->out.interval / 1250;
+
+ if (qos->bis)
+ sync_interval = qos->sync_interval * 1600;
+
+ err = hci_start_per_adv_sync(hdev, qos->bis, conn->le_per_adv_data_len,
+ conn->le_per_adv_data, flags, interval,
+ interval, sync_interval);
+ if (err)
+ return err;
+
+ return hci_le_create_big(conn, &conn->iso_qos);
+}
+
+static void create_pa_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct hci_cp_le_pa_create_sync *cp = data;
+
+ bt_dev_dbg(hdev, "");
+
+ if (err)
+ bt_dev_err(hdev, "Unable to create PA: %d", err);
+
+ kfree(cp);
+}
+
+static int create_pa_sync(struct hci_dev *hdev, void *data)
+{
+ struct hci_cp_le_pa_create_sync *cp = data;
+ int err;
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC,
+ sizeof(*cp), cp, HCI_CMD_TIMEOUT);
+ if (err) {
+ hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+ return err;
+ }
+
+ return hci_update_passive_scan_sync(hdev);
+}
+
+int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
+ __u8 sid)
+{
+ struct hci_cp_le_pa_create_sync *cp;
+
+ if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC))
+ return -EBUSY;
+
+ cp = kmalloc(sizeof(*cp), GFP_KERNEL);
+ if (!cp) {
+ hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+ return -ENOMEM;
+ }
+
+ /* Convert from ISO socket address type to HCI address type */
+ if (dst_type == BDADDR_LE_PUBLIC)
+ dst_type = ADDR_LE_DEV_PUBLIC;
+ else
+ dst_type = ADDR_LE_DEV_RANDOM;
+
+ memset(cp, 0, sizeof(*cp));
+ cp->sid = sid;
+ cp->addr_type = dst_type;
+ bacpy(&cp->addr, dst);
+
+ /* Queue start pa_create_sync and scan */
+ return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
+}
+
+int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
+ __u16 sync_handle, __u8 num_bis, __u8 bis[])
+{
+ struct _packed {
+ struct hci_cp_le_big_create_sync cp;
+ __u8 bis[0x11];
+ } pdu;
+ int err;
+
+ if (num_bis > sizeof(pdu.bis))
+ return -EINVAL;
+
+ err = qos_set_big(hdev, qos);
+ if (err)
+ return err;
+
+ memset(&pdu, 0, sizeof(pdu));
+ pdu.cp.handle = qos->big;
+ pdu.cp.sync_handle = cpu_to_le16(sync_handle);
+ pdu.cp.num_bis = num_bis;
+ memcpy(pdu.bis, bis, num_bis);
+
+ return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
+ sizeof(pdu.cp) + num_bis, &pdu);
+}
+
+static void create_big_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct hci_conn *conn = data;
+
+ bt_dev_dbg(hdev, "conn %p", conn);
+
+ if (err) {
+ bt_dev_err(hdev, "Unable to create BIG: %d", err);
+ hci_connect_cfm(conn, err);
+ hci_conn_del(conn);
+ }
+}
+
+struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
+{
+ struct hci_conn *conn;
+ int err;
+
+ /* We need hci_conn object using the BDADDR_ANY as dst */
+ conn = hci_add_bis(hdev, dst, qos);
+ if (IS_ERR(conn))
+ return conn;
+
+ conn = hci_bind_bis(conn, qos);
+ if (!conn) {
+ hci_conn_drop(conn);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Add Basic Announcement into Peridic Adv Data if BASE is set */
+ if (base_len && base) {
+ base_len = eir_append_service_data(conn->le_per_adv_data, 0,
+ 0x1851, base, base_len);
+ conn->le_per_adv_data_len = base_len;
+ }
+
+ /* Queue start periodic advertising and create BIG */
+ err = hci_cmd_sync_queue(hdev, create_big_sync, conn,
+ create_big_complete);
+ if (err < 0) {
+ hci_conn_drop(conn);
+ return ERR_PTR(err);
+ }
+
+ hci_iso_qos_setup(hdev, conn, &qos->out,
+ conn->le_tx_phy ? conn->le_tx_phy :
+ hdev->le_tx_def_phys);
+
+ return conn;
+}
+
+struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos)
+{
+ struct hci_conn *le;
+ struct hci_conn *cis;
+
+ if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
+ le = hci_connect_le(hdev, dst, dst_type, false,
+ BT_SECURITY_LOW,
+ HCI_LE_CONN_TIMEOUT,
+ HCI_ROLE_SLAVE);
+ else
+ le = hci_connect_le_scan(hdev, dst, dst_type,
+ BT_SECURITY_LOW,
+ HCI_LE_CONN_TIMEOUT,
+ CONN_REASON_ISO_CONNECT);
+ if (IS_ERR(le))
+ return le;
+
+ hci_iso_qos_setup(hdev, le, &qos->out,
+ le->le_tx_phy ? le->le_tx_phy : hdev->le_tx_def_phys);
+ hci_iso_qos_setup(hdev, le, &qos->in,
+ le->le_rx_phy ? le->le_rx_phy : hdev->le_rx_def_phys);
+
+ cis = hci_bind_cis(hdev, dst, dst_type, qos);
+ if (IS_ERR(cis)) {
+ hci_conn_drop(le);
+ return cis;
+ }
+
+ le->link = cis;
+ cis->link = le;
+
+ hci_conn_hold(cis);
+
+ /* If LE is already connected and CIS handle is already set proceed to
+ * Create CIS immediately.
+ */
+ if (le->state == BT_CONNECTED && cis->handle != HCI_CONN_HANDLE_UNSET)
+ hci_le_create_cis(le);
+
+ return cis;
+}
+
/* Check link security requirement */
int hci_conn_check_link_mode(struct hci_conn *conn)
{
@@ -1785,3 +2766,79 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
return phys;
}
+
+int hci_abort_conn(struct hci_conn *conn, u8 reason)
+{
+ int r = 0;
+
+ switch (conn->state) {
+ case BT_CONNECTED:
+ case BT_CONFIG:
+ if (conn->type == AMP_LINK) {
+ struct hci_cp_disconn_phy_link cp;
+
+ cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
+ cp.reason = reason;
+ r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
+ sizeof(cp), &cp);
+ } else {
+ struct hci_cp_disconnect dc;
+
+ dc.handle = cpu_to_le16(conn->handle);
+ dc.reason = reason;
+ r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT,
+ sizeof(dc), &dc);
+ }
+
+ conn->state = BT_DISCONN;
+
+ break;
+ case BT_CONNECT:
+ if (conn->type == LE_LINK) {
+ if (test_bit(HCI_CONN_SCANNING, &conn->flags))
+ break;
+ r = hci_send_cmd(conn->hdev,
+ HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
+ } else if (conn->type == ACL_LINK) {
+ if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
+ break;
+ r = hci_send_cmd(conn->hdev,
+ HCI_OP_CREATE_CONN_CANCEL,
+ 6, &conn->dst);
+ }
+ break;
+ case BT_CONNECT2:
+ if (conn->type == ACL_LINK) {
+ struct hci_cp_reject_conn_req rej;
+
+ bacpy(&rej.bdaddr, &conn->dst);
+ rej.reason = reason;
+
+ r = hci_send_cmd(conn->hdev,
+ HCI_OP_REJECT_CONN_REQ,
+ sizeof(rej), &rej);
+ } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
+ struct hci_cp_reject_sync_conn_req rej;
+
+ bacpy(&rej.bdaddr, &conn->dst);
+
+ /* SCO rejection has its own limited set of
+ * allowed error values (0x0D-0x0F) which isn't
+ * compatible with most values passed to this
+ * function. To be safe hard-code one of the
+ * values that's suitable for SCO.
+ */
+ rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
+
+ r = hci_send_cmd(conn->hdev,
+ HCI_OP_REJECT_SYNC_CONN_REQ,
+ sizeof(rej), &rej);
+ }
+ break;
+ default:
+ conn->state = BT_CLOSED;
+ break;
+ }
+
+ return r;
+}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2b7bd3655b07..0540555b3704 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -29,6 +29,7 @@
#include <linux/rfkill.h>
#include <linux/debugfs.h>
#include <linux/crypto.h>
+#include <linux/kcov.h>
#include <linux/property.h>
#include <linux/suspend.h>
#include <linux/wait.h>
@@ -571,6 +572,7 @@ int hci_dev_close(__u16 dev)
goto done;
}
+ cancel_work_sync(&hdev->power_on);
if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
cancel_delayed_work(&hdev->power_off);
@@ -593,6 +595,20 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
skb_queue_purge(&hdev->rx_q);
skb_queue_purge(&hdev->cmd_q);
+ /* Cancel these to avoid queueing non-chained pending work */
+ hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE);
+ /* Wait for
+ *
+ * if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
+ * queue_delayed_work(&hdev->{cmd,ncmd}_timer)
+ *
+ * inside RCU section to see the flag or complete scheduling.
+ */
+ synchronize_rcu();
+ /* Explicitly cancel works in case scheduled after setting the flag. */
+ cancel_delayed_work(&hdev->cmd_timer);
+ cancel_delayed_work(&hdev->ncmd_timer);
+
/* Avoid potential lockdep warnings from the *_flush() calls by
* ensuring the workqueue is empty up front.
*/
@@ -606,8 +622,13 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
if (hdev->flush)
hdev->flush(hdev);
+ hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE);
+
atomic_set(&hdev->cmd_cnt, 1);
- hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
+ hdev->acl_cnt = 0;
+ hdev->sco_cnt = 0;
+ hdev->le_cnt = 0;
+ hdev->iso_cnt = 0;
ret = hci_reset_sync(hdev);
@@ -702,7 +723,7 @@ static void hci_update_passive_scan_state(struct hci_dev *hdev, u8 scan)
hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- hci_req_update_adv_data(hdev, hdev->cur_adv_instance);
+ hci_update_adv_data(hdev, hdev->cur_adv_instance);
mgmt_new_settings(hdev);
}
@@ -1690,63 +1711,83 @@ static void adv_instance_rpa_expired(struct work_struct *work)
}
/* This function requires the caller holds hdev->lock */
-int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
- u16 adv_data_len, u8 *adv_data,
- u16 scan_rsp_len, u8 *scan_rsp_data,
- u16 timeout, u16 duration, s8 tx_power,
- u32 min_interval, u32 max_interval)
+struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
+ u32 flags, u16 adv_data_len, u8 *adv_data,
+ u16 scan_rsp_len, u8 *scan_rsp_data,
+ u16 timeout, u16 duration, s8 tx_power,
+ u32 min_interval, u32 max_interval,
+ u8 mesh_handle)
{
- struct adv_info *adv_instance;
+ struct adv_info *adv;
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (adv_instance) {
- memset(adv_instance->adv_data, 0,
- sizeof(adv_instance->adv_data));
- memset(adv_instance->scan_rsp_data, 0,
- sizeof(adv_instance->scan_rsp_data));
+ adv = hci_find_adv_instance(hdev, instance);
+ if (adv) {
+ memset(adv->adv_data, 0, sizeof(adv->adv_data));
+ memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data));
+ memset(adv->per_adv_data, 0, sizeof(adv->per_adv_data));
} else {
if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets ||
- instance < 1 || instance > hdev->le_num_of_adv_sets)
- return -EOVERFLOW;
+ instance < 1 || instance > hdev->le_num_of_adv_sets + 1)
+ return ERR_PTR(-EOVERFLOW);
- adv_instance = kzalloc(sizeof(*adv_instance), GFP_KERNEL);
- if (!adv_instance)
- return -ENOMEM;
+ adv = kzalloc(sizeof(*adv), GFP_KERNEL);
+ if (!adv)
+ return ERR_PTR(-ENOMEM);
- adv_instance->pending = true;
- adv_instance->instance = instance;
- list_add(&adv_instance->list, &hdev->adv_instances);
+ adv->pending = true;
+ adv->instance = instance;
+ list_add(&adv->list, &hdev->adv_instances);
hdev->adv_instance_cnt++;
}
- adv_instance->flags = flags;
- adv_instance->adv_data_len = adv_data_len;
- adv_instance->scan_rsp_len = scan_rsp_len;
- adv_instance->min_interval = min_interval;
- adv_instance->max_interval = max_interval;
- adv_instance->tx_power = tx_power;
-
- if (adv_data_len)
- memcpy(adv_instance->adv_data, adv_data, adv_data_len);
+ adv->flags = flags;
+ adv->min_interval = min_interval;
+ adv->max_interval = max_interval;
+ adv->tx_power = tx_power;
+ /* Defining a mesh_handle changes the timing units to ms,
+ * rather than seconds, and ties the instance to the requested
+ * mesh_tx queue.
+ */
+ adv->mesh = mesh_handle;
- if (scan_rsp_len)
- memcpy(adv_instance->scan_rsp_data,
- scan_rsp_data, scan_rsp_len);
+ hci_set_adv_instance_data(hdev, instance, adv_data_len, adv_data,
+ scan_rsp_len, scan_rsp_data);
- adv_instance->timeout = timeout;
- adv_instance->remaining_time = timeout;
+ adv->timeout = timeout;
+ adv->remaining_time = timeout;
if (duration == 0)
- adv_instance->duration = hdev->def_multi_adv_rotation_duration;
+ adv->duration = hdev->def_multi_adv_rotation_duration;
else
- adv_instance->duration = duration;
+ adv->duration = duration;
- INIT_DELAYED_WORK(&adv_instance->rpa_expired_cb,
- adv_instance_rpa_expired);
+ INIT_DELAYED_WORK(&adv->rpa_expired_cb, adv_instance_rpa_expired);
BT_DBG("%s for %dMR", hdev->name, instance);
- return 0;
+ return adv;
+}
+
+/* This function requires the caller holds hdev->lock */
+struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
+ u32 flags, u8 data_len, u8 *data,
+ u32 min_interval, u32 max_interval)
+{
+ struct adv_info *adv;
+
+ adv = hci_add_adv_instance(hdev, instance, flags, 0, NULL, 0, NULL,
+ 0, 0, HCI_ADV_TX_POWER_NO_PREFERENCE,
+ min_interval, max_interval, 0);
+ if (IS_ERR(adv))
+ return adv;
+
+ adv->periodic = true;
+ adv->per_adv_data_len = data_len;
+
+ if (data)
+ memcpy(adv->per_adv_data, data, data_len);
+
+ return adv;
}
/* This function requires the caller holds hdev->lock */
@@ -1754,29 +1795,33 @@ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance,
u16 adv_data_len, u8 *adv_data,
u16 scan_rsp_len, u8 *scan_rsp_data)
{
- struct adv_info *adv_instance;
+ struct adv_info *adv;
- adv_instance = hci_find_adv_instance(hdev, instance);
+ adv = hci_find_adv_instance(hdev, instance);
/* If advertisement doesn't exist, we can't modify its data */
- if (!adv_instance)
+ if (!adv)
return -ENOENT;
- if (adv_data_len) {
- memset(adv_instance->adv_data, 0,
- sizeof(adv_instance->adv_data));
- memcpy(adv_instance->adv_data, adv_data, adv_data_len);
- adv_instance->adv_data_len = adv_data_len;
+ if (adv_data_len && ADV_DATA_CMP(adv, adv_data, adv_data_len)) {
+ memset(adv->adv_data, 0, sizeof(adv->adv_data));
+ memcpy(adv->adv_data, adv_data, adv_data_len);
+ adv->adv_data_len = adv_data_len;
+ adv->adv_data_changed = true;
}
- if (scan_rsp_len) {
- memset(adv_instance->scan_rsp_data, 0,
- sizeof(adv_instance->scan_rsp_data));
- memcpy(adv_instance->scan_rsp_data,
- scan_rsp_data, scan_rsp_len);
- adv_instance->scan_rsp_len = scan_rsp_len;
+ if (scan_rsp_len && SCAN_RSP_CMP(adv, scan_rsp_data, scan_rsp_len)) {
+ memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data));
+ memcpy(adv->scan_rsp_data, scan_rsp_data, scan_rsp_len);
+ adv->scan_rsp_len = scan_rsp_len;
+ adv->scan_rsp_changed = true;
}
+ /* Mark as changed if there are flags which would affect it */
+ if (((adv->flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) ||
+ adv->flags & MGMT_ADV_FLAG_LOCAL_NAME)
+ adv->scan_rsp_changed = true;
+
return 0;
}
@@ -1873,151 +1918,120 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
kfree(monitor);
}
-int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
-{
- return mgmt_add_adv_patterns_monitor_complete(hdev, status);
-}
-
-int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status)
-{
- return mgmt_remove_adv_monitor_complete(hdev, status);
-}
-
/* Assigns handle to a monitor, and if offloading is supported and power is on,
* also attempts to forward the request to the controller.
- * Returns true if request is forwarded (result is pending), false otherwise.
- * This function requires the caller holds hdev->lock.
+ * This function requires the caller holds hci_req_sync_lock.
*/
-bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
- int *err)
+int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
{
int min, max, handle;
+ int status = 0;
- *err = 0;
+ if (!monitor)
+ return -EINVAL;
- if (!monitor) {
- *err = -EINVAL;
- return false;
- }
+ hci_dev_lock(hdev);
min = HCI_MIN_ADV_MONITOR_HANDLE;
max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES;
handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max,
GFP_KERNEL);
- if (handle < 0) {
- *err = handle;
- return false;
- }
+
+ hci_dev_unlock(hdev);
+
+ if (handle < 0)
+ return handle;
monitor->handle = handle;
if (!hdev_is_powered(hdev))
- return false;
+ return status;
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE:
- hci_update_passive_scan(hdev);
- bt_dev_dbg(hdev, "%s add monitor status %d", hdev->name, *err);
+ bt_dev_dbg(hdev, "%s add monitor %d status %d", hdev->name,
+ monitor->handle, status);
/* Message was not forwarded to controller - not an error */
- return false;
+ break;
+
case HCI_ADV_MONITOR_EXT_MSFT:
- *err = msft_add_monitor_pattern(hdev, monitor);
- bt_dev_dbg(hdev, "%s add monitor msft status %d", hdev->name,
- *err);
+ status = msft_add_monitor_pattern(hdev, monitor);
+ bt_dev_dbg(hdev, "%s add monitor %d msft status %d", hdev->name,
+ monitor->handle, status);
break;
}
- return (*err == 0);
+ return status;
}
/* Attempts to tell the controller and free the monitor. If somehow the
* controller doesn't have a corresponding handle, remove anyway.
- * Returns true if request is forwarded (result is pending), false otherwise.
- * This function requires the caller holds hdev->lock.
+ * This function requires the caller holds hci_req_sync_lock.
*/
-static bool hci_remove_adv_monitor(struct hci_dev *hdev,
- struct adv_monitor *monitor,
- u16 handle, int *err)
+static int hci_remove_adv_monitor(struct hci_dev *hdev,
+ struct adv_monitor *monitor)
{
- *err = 0;
+ int status = 0;
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
+ bt_dev_dbg(hdev, "%s remove monitor %d status %d", hdev->name,
+ monitor->handle, status);
goto free_monitor;
+
case HCI_ADV_MONITOR_EXT_MSFT:
- *err = msft_remove_monitor(hdev, monitor, handle);
+ status = msft_remove_monitor(hdev, monitor);
+ bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
+ hdev->name, monitor->handle, status);
break;
}
/* In case no matching handle registered, just free the monitor */
- if (*err == -ENOENT)
+ if (status == -ENOENT)
goto free_monitor;
- return (*err == 0);
+ return status;
free_monitor:
- if (*err == -ENOENT)
+ if (status == -ENOENT)
bt_dev_warn(hdev, "Removing monitor with no matching handle %d",
monitor->handle);
hci_free_adv_monitor(hdev, monitor);
- *err = 0;
- return false;
+ return status;
}
-/* Returns true if request is forwarded (result is pending), false otherwise.
- * This function requires the caller holds hdev->lock.
- */
-bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err)
+/* This function requires the caller holds hci_req_sync_lock */
+int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle)
{
struct adv_monitor *monitor = idr_find(&hdev->adv_monitors_idr, handle);
- bool pending;
-
- if (!monitor) {
- *err = -EINVAL;
- return false;
- }
- pending = hci_remove_adv_monitor(hdev, monitor, handle, err);
- if (!*err && !pending)
- hci_update_passive_scan(hdev);
-
- bt_dev_dbg(hdev, "%s remove monitor handle %d, status %d, %spending",
- hdev->name, handle, *err, pending ? "" : "not ");
+ if (!monitor)
+ return -EINVAL;
- return pending;
+ return hci_remove_adv_monitor(hdev, monitor);
}
-/* Returns true if request is forwarded (result is pending), false otherwise.
- * This function requires the caller holds hdev->lock.
- */
-bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err)
+/* This function requires the caller holds hci_req_sync_lock */
+int hci_remove_all_adv_monitor(struct hci_dev *hdev)
{
struct adv_monitor *monitor;
int idr_next_id = 0;
- bool pending = false;
- bool update = false;
+ int status = 0;
- *err = 0;
-
- while (!*err && !pending) {
+ while (1) {
monitor = idr_get_next(&hdev->adv_monitors_idr, &idr_next_id);
if (!monitor)
break;
- pending = hci_remove_adv_monitor(hdev, monitor, 0, err);
+ status = hci_remove_adv_monitor(hdev, monitor);
+ if (status)
+ return status;
- if (!*err && !pending)
- update = true;
+ idr_next_id++;
}
- if (update)
- hci_update_passive_scan(hdev);
-
- bt_dev_dbg(hdev, "%s remove all monitors status %d, %spending",
- hdev->name, *err, pending ? "" : "not ");
-
- return pending;
+ return status;
}
/* This function requires the caller holds hdev->lock */
@@ -2153,7 +2167,7 @@ int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
bacpy(&entry->bdaddr, bdaddr);
entry->bdaddr_type = type;
- bitmap_from_u64(entry->flags, flags);
+ entry->flags = flags;
list_add(&entry->list, list);
@@ -2392,6 +2406,10 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
container_of(nb, struct hci_dev, suspend_notifier);
int ret = 0;
+ /* Userspace has full control of this device. Do nothing. */
+ if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
+ return NOTIFY_DONE;
+
if (action == PM_SUSPEND_PREPARE)
ret = hci_suspend_dev(hdev);
else if (action == PM_POST_SUSPEND)
@@ -2487,6 +2505,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
+ INIT_LIST_HEAD(&hdev->mesh_pending);
INIT_LIST_HEAD(&hdev->mgmt_pending);
INIT_LIST_HEAD(&hdev->reject_list);
INIT_LIST_HEAD(&hdev->accept_list);
@@ -2503,6 +2522,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_LIST_HEAD(&hdev->adv_instances);
INIT_LIST_HEAD(&hdev->blocked_keys);
+ INIT_LIST_HEAD(&hdev->monitored_devices);
INIT_LIST_HEAD(&hdev->local_codecs);
INIT_WORK(&hdev->rx_work, hci_rx_work);
@@ -2554,10 +2574,10 @@ int hci_register_dev(struct hci_dev *hdev)
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -2566,7 +2586,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- sprintf(hdev->name, "hci%d", id);
+ snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -2633,17 +2653,14 @@ int hci_register_dev(struct hci_dev *hdev)
* callback.
*/
if (hdev->wakeup)
- set_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, hdev->conn_flags);
+ hdev->conn_flags |= HCI_CONN_FLAG_REMOTE_WAKEUP;
hci_sock_dev_event(hdev, HCI_DEV_REG);
hci_dev_hold(hdev);
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
- hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
- error = register_pm_notifier(&hdev->suspend_notifier);
- if (error)
- goto err_wqueue;
- }
+ error = hci_register_suspend_notifier(hdev);
+ if (error)
+ goto err_wqueue;
queue_work(hdev->req_workqueue, &hdev->power_on);
@@ -2678,8 +2695,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_cmd_sync_clear(hdev);
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
- unregister_pm_notifier(&hdev->suspend_notifier);
+ hci_unregister_suspend_notifier(hdev);
msft_unregister(hdev);
@@ -2738,10 +2754,34 @@ void hci_release_dev(struct hci_dev *hdev)
hci_dev_unlock(hdev);
ida_simple_remove(&hci_index_ida, hdev->id);
+ kfree_skb(hdev->sent_cmd);
+ kfree_skb(hdev->recv_event);
kfree(hdev);
}
EXPORT_SYMBOL(hci_release_dev);
+int hci_register_suspend_notifier(struct hci_dev *hdev)
+{
+ int ret = 0;
+
+ if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
+ ret = register_pm_notifier(&hdev->suspend_notifier);
+ }
+
+ return ret;
+}
+
+int hci_unregister_suspend_notifier(struct hci_dev *hdev)
+{
+ int ret = 0;
+
+ if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
+ ret = unregister_pm_notifier(&hdev->suspend_notifier);
+
+ return ret;
+}
+
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
{
@@ -3022,6 +3062,37 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
}
+/* Get data from last received event */
+void *hci_recv_event_data(struct hci_dev *hdev, __u8 event)
+{
+ struct hci_event_hdr *hdr;
+ int offset;
+
+ if (!hdev->recv_event)
+ return NULL;
+
+ hdr = (void *)hdev->recv_event->data;
+ offset = sizeof(*hdr);
+
+ if (hdr->evt != event) {
+ /* In case of LE metaevent check the subevent match */
+ if (hdr->evt == HCI_EV_LE_META) {
+ struct hci_ev_le_meta *ev;
+
+ ev = (void *)hdev->recv_event->data + offset;
+ offset += sizeof(*ev);
+ if (ev->subevent == event)
+ goto found;
+ }
+ return NULL;
+ }
+
+found:
+ bt_dev_dbg(hdev, "event 0x%2.2x", event);
+
+ return hdev->recv_event->data + offset;
+}
+
/* Send ACL data */
static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
{
@@ -3129,9 +3200,117 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
queue_work(hdev->workqueue, &hdev->tx_work);
}
+/* Send ISO data */
+static void hci_add_iso_hdr(struct sk_buff *skb, __u16 handle, __u8 flags)
+{
+ struct hci_iso_hdr *hdr;
+ int len = skb->len;
+
+ skb_push(skb, HCI_ISO_HDR_SIZE);
+ skb_reset_transport_header(skb);
+ hdr = (struct hci_iso_hdr *)skb_transport_header(skb);
+ hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
+ hdr->dlen = cpu_to_le16(len);
+}
+
+static void hci_queue_iso(struct hci_conn *conn, struct sk_buff_head *queue,
+ struct sk_buff *skb)
+{
+ struct hci_dev *hdev = conn->hdev;
+ struct sk_buff *list;
+ __u16 flags;
+
+ skb->len = skb_headlen(skb);
+ skb->data_len = 0;
+
+ hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;
+
+ list = skb_shinfo(skb)->frag_list;
+
+ flags = hci_iso_flags_pack(list ? ISO_START : ISO_SINGLE, 0x00);
+ hci_add_iso_hdr(skb, conn->handle, flags);
+
+ if (!list) {
+ /* Non fragmented */
+ BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len);
+
+ skb_queue_tail(queue, skb);
+ } else {
+ /* Fragmented */
+ BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ __skb_queue_tail(queue, skb);
+
+ do {
+ skb = list; list = list->next;
+
+ hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;
+ flags = hci_iso_flags_pack(list ? ISO_CONT : ISO_END,
+ 0x00);
+ hci_add_iso_hdr(skb, conn->handle, flags);
+
+ BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
+
+ __skb_queue_tail(queue, skb);
+ } while (list);
+ }
+}
+
+void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ BT_DBG("%s len %d", hdev->name, skb->len);
+
+ hci_queue_iso(conn, &conn->data_q, skb);
+
+ queue_work(hdev->workqueue, &hdev->tx_work);
+}
+
/* ---- HCI TX task (outgoing data) ---- */
/* HCI Connection scheduler */
+static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
+{
+ struct hci_dev *hdev;
+ int cnt, q;
+
+ if (!conn) {
+ *quote = 0;
+ return;
+ }
+
+ hdev = conn->hdev;
+
+ switch (conn->type) {
+ case ACL_LINK:
+ cnt = hdev->acl_cnt;
+ break;
+ case AMP_LINK:
+ cnt = hdev->block_cnt;
+ break;
+ case SCO_LINK:
+ case ESCO_LINK:
+ cnt = hdev->sco_cnt;
+ break;
+ case LE_LINK:
+ cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
+ break;
+ case ISO_LINK:
+ cnt = hdev->iso_mtu ? hdev->iso_cnt :
+ hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
+ break;
+ default:
+ cnt = 0;
+ bt_dev_err(hdev, "unknown link type %d", conn->type);
+ }
+
+ q = cnt / num;
+ *quote = q ? q : 1;
+}
+
static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
int *quote)
{
@@ -3164,29 +3343,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
rcu_read_unlock();
- if (conn) {
- int cnt, q;
-
- switch (conn->type) {
- case ACL_LINK:
- cnt = hdev->acl_cnt;
- break;
- case SCO_LINK:
- case ESCO_LINK:
- cnt = hdev->sco_cnt;
- break;
- case LE_LINK:
- cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
- break;
- default:
- cnt = 0;
- bt_dev_err(hdev, "unknown link type %d", conn->type);
- }
-
- q = cnt / num;
- *quote = q ? q : 1;
- } else
- *quote = 0;
+ hci_quote_sent(conn, num, quote);
BT_DBG("conn %p quote %d", conn, *quote);
return conn;
@@ -3220,7 +3377,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
struct hci_chan *chan = NULL;
unsigned int num = 0, min = ~0, cur_prio = 0;
struct hci_conn *conn;
- int cnt, q, conn_num = 0;
+ int conn_num = 0;
BT_DBG("%s", hdev->name);
@@ -3270,27 +3427,8 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
if (!chan)
return NULL;
- switch (chan->conn->type) {
- case ACL_LINK:
- cnt = hdev->acl_cnt;
- break;
- case AMP_LINK:
- cnt = hdev->block_cnt;
- break;
- case SCO_LINK:
- case ESCO_LINK:
- cnt = hdev->sco_cnt;
- break;
- case LE_LINK:
- cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
- break;
- default:
- cnt = 0;
- bt_dev_err(hdev, "unknown link type %d", chan->conn->type);
- }
+ hci_quote_sent(chan->conn, num, quote);
- q = cnt / num;
- *quote = q ? q : 1;
BT_DBG("chan %p quote %d", chan, *quote);
return chan;
}
@@ -3351,15 +3489,27 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len);
}
-static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
+static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type)
{
- if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- /* ACL tx timeout must be longer than maximum
- * link supervision timeout (40.9 seconds) */
- if (!cnt && time_after(jiffies, hdev->acl_last_tx +
- HCI_ACL_TX_TIMEOUT))
- hci_link_tx_to(hdev, ACL_LINK);
+ unsigned long last_tx;
+
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
+ return;
+
+ switch (type) {
+ case LE_LINK:
+ last_tx = hdev->le_last_tx;
+ break;
+ default:
+ last_tx = hdev->acl_last_tx;
+ break;
}
+
+ /* tx timeout must be longer than maximum link supervision timeout
+ * (40.9 seconds)
+ */
+ if (!cnt && time_after(jiffies, last_tx + HCI_ACL_TX_TIMEOUT))
+ hci_link_tx_to(hdev, type);
}
/* Schedule SCO */
@@ -3417,7 +3567,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
struct sk_buff *skb;
int quote;
- __check_timeout(hdev, cnt);
+ __check_timeout(hdev, cnt, ACL_LINK);
while (hdev->acl_cnt &&
(chan = hci_chan_sent(hdev, ACL_LINK, &quote))) {
@@ -3460,8 +3610,6 @@ static void hci_sched_acl_blk(struct hci_dev *hdev)
int quote;
u8 type;
- __check_timeout(hdev, cnt);
-
BT_DBG("%s", hdev->name);
if (hdev->dev_type == HCI_AMP)
@@ -3469,6 +3617,8 @@ static void hci_sched_acl_blk(struct hci_dev *hdev)
else
type = ACL_LINK;
+ __check_timeout(hdev, cnt, type);
+
while (hdev->block_cnt > 0 &&
(chan = hci_chan_sent(hdev, type, &quote))) {
u32 priority = (skb_peek(&chan->data_q))->priority;
@@ -3542,7 +3692,7 @@ static void hci_sched_le(struct hci_dev *hdev)
cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
- __check_timeout(hdev, cnt);
+ __check_timeout(hdev, cnt, LE_LINK);
tmp = cnt;
while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) {
@@ -3579,18 +3729,46 @@ static void hci_sched_le(struct hci_dev *hdev)
hci_prio_recalculate(hdev, LE_LINK);
}
+/* Schedule CIS */
+static void hci_sched_iso(struct hci_dev *hdev)
+{
+ struct hci_conn *conn;
+ struct sk_buff *skb;
+ int quote, *cnt;
+
+ BT_DBG("%s", hdev->name);
+
+ if (!hci_conn_num(hdev, ISO_LINK))
+ return;
+
+ cnt = hdev->iso_pkts ? &hdev->iso_cnt :
+ hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt;
+ while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, &quote))) {
+ while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
+ BT_DBG("skb %p len %d", skb, skb->len);
+ hci_send_frame(hdev, skb);
+
+ conn->sent++;
+ if (conn->sent == ~0)
+ conn->sent = 0;
+ (*cnt)--;
+ }
+ }
+}
+
static void hci_tx_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, tx_work);
struct sk_buff *skb;
- BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt,
- hdev->sco_cnt, hdev->le_cnt);
+ BT_DBG("%s acl %d sco %d le %d iso %d", hdev->name, hdev->acl_cnt,
+ hdev->sco_cnt, hdev->le_cnt, hdev->iso_cnt);
if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
/* Schedule queues and send stuff to HCI driver */
hci_sched_sco(hdev);
hci_sched_esco(hdev);
+ hci_sched_iso(hdev);
hci_sched_acl(hdev);
hci_sched_le(hdev);
}
@@ -3666,10 +3844,47 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
sco_recv_scodata(conn, skb);
return;
} else {
- bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
+ bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d",
+ handle);
+ }
+
+ kfree_skb(skb);
+}
+
+static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_iso_hdr *hdr;
+ struct hci_conn *conn;
+ __u16 handle, flags;
+
+ hdr = skb_pull_data(skb, sizeof(*hdr));
+ if (!hdr) {
+ bt_dev_err(hdev, "ISO packet too small");
+ goto drop;
+ }
+
+ handle = __le16_to_cpu(hdr->handle);
+ flags = hci_flags(handle);
+ handle = hci_handle(handle);
+
+ bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len,
+ handle, flags);
+
+ hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ hci_dev_unlock(hdev);
+
+ if (!conn) {
+ bt_dev_err(hdev, "ISO packet for unknown connection handle %d",
handle);
+ goto drop;
}
+ /* Send to upper protocol */
+ iso_recv(conn, skb, flags);
+ return;
+
+drop:
kfree_skb(skb);
}
@@ -3778,7 +3993,14 @@ static void hci_rx_work(struct work_struct *work)
BT_DBG("%s", hdev->name);
- while ((skb = skb_dequeue(&hdev->rx_q))) {
+ /* The kcov_remote functions used for collecting packet parsing
+ * coverage information from this background thread and associate
+ * the coverage with the syscall's thread which originally injected
+ * the packet. This helps fuzzing the kernel.
+ */
+ for (; (skb = skb_dequeue(&hdev->rx_q)); kcov_remote_stop()) {
+ kcov_remote_start_common(skb_get_kcov_handle(skb));
+
/* Send copy to monitor */
hci_send_to_monitor(hdev, skb);
@@ -3827,6 +4049,11 @@ static void hci_rx_work(struct work_struct *work)
hci_scodata_packet(hdev, skb);
break;
+ case HCI_ISODATA_PKT:
+ BT_DBG("%s ISO data packet", hdev->name);
+ hci_isodata_packet(hdev, skb);
+ break;
+
default:
kfree_skb(skb);
break;
@@ -3861,11 +4088,14 @@ static void hci_cmd_work(struct work_struct *work)
if (res < 0)
__hci_cmd_sync_cancel(hdev, -res);
- if (test_bit(HCI_RESET, &hdev->flags))
+ rcu_read_lock();
+ if (test_bit(HCI_RESET, &hdev->flags) ||
+ hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
cancel_delayed_work(&hdev->cmd_timer);
else
- schedule_delayed_work(&hdev->cmd_timer,
- HCI_CMD_TIMEOUT);
+ queue_delayed_work(hdev->workqueue, &hdev->cmd_timer,
+ HCI_CMD_TIMEOUT);
+ rcu_read_unlock();
} else {
skb_queue_head(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 902b40a90b91..3f401ec5bb0c 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -1245,7 +1245,7 @@ void hci_debugfs_create_conn(struct hci_conn *conn)
struct hci_dev *hdev = conn->hdev;
char name[6];
- if (IS_ERR_OR_NULL(hdev->debugfs))
+ if (IS_ERR_OR_NULL(hdev->debugfs) || conn->debugfs)
return;
snprintf(name, sizeof(name), "%u", conn->handle);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index fc30f4c03d29..faca701bce2a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -328,14 +328,17 @@ static u8 hci_cc_delete_stored_link_key(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_rp_delete_stored_link_key *rp = data;
+ u16 num_keys;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
if (rp->status)
return rp->status;
- if (rp->num_keys <= hdev->stored_num_keys)
- hdev->stored_num_keys -= le16_to_cpu(rp->num_keys);
+ num_keys = le16_to_cpu(rp->num_keys);
+
+ if (num_keys <= hdev->stored_num_keys)
+ hdev->stored_num_keys -= num_keys;
else
hdev->stored_num_keys = 0;
@@ -709,6 +712,47 @@ static u8 hci_cc_read_local_version(struct hci_dev *hdev, void *data,
return rp->status;
}
+static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_rp_read_enc_key_size *rp = data;
+ struct hci_conn *conn;
+ u16 handle;
+ u8 status = rp->status;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ handle = le16_to_cpu(rp->handle);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn) {
+ status = 0xFF;
+ goto done;
+ }
+
+ /* While unexpected, the read_enc_key_size command may fail. The most
+ * secure approach is to then assume the key size is 0 to force a
+ * disconnection.
+ */
+ if (status) {
+ bt_dev_err(hdev, "failed to read key size for handle %u",
+ handle);
+ conn->enc_key_size = 0;
+ } else {
+ conn->enc_key_size = rp->key_size;
+ status = 0;
+ }
+
+ hci_encrypt_cfm(conn, 0);
+
+done:
+ hci_dev_unlock(hdev);
+
+ return status;
+}
+
static u8 hci_cc_read_local_commands(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -1712,6 +1756,8 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable)
hci_dev_set_flag(hdev, HCI_LE_SCAN);
if (hdev->le_scan_type == LE_SCAN_ACTIVE)
clear_pending_adv_report(hdev);
+ if (hci_dev_test_flag(hdev, HCI_MESH))
+ hci_discovery_set_state(hdev, DISCOVERY_FINDING);
break;
case LE_SCAN_DISABLE:
@@ -1726,7 +1772,7 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable)
d->last_adv_addr_type, NULL,
d->last_adv_rssi, d->last_adv_flags,
d->last_adv_data,
- d->last_adv_data_len, NULL, 0);
+ d->last_adv_data_len, NULL, 0, 0);
}
/* Cancel this timer so that we don't try to disable scanning
@@ -1742,6 +1788,9 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable)
*/
if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED))
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) &&
+ hdev->discovery.state == DISCOVERY_FINDING)
+ queue_work(hdev->workqueue, &hdev->reenable_adv_work);
break;
@@ -1835,7 +1884,9 @@ static u8 hci_cc_le_clear_accept_list(struct hci_dev *hdev, void *data,
if (rp->status)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->le_accept_list);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1855,8 +1906,10 @@ static u8 hci_cc_le_add_to_accept_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1876,8 +1929,10 @@ static u8 hci_cc_le_del_from_accept_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1949,9 +2004,11 @@ static u8 hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
sent->bdaddr_type, sent->peer_irk,
sent->local_irk);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1971,8 +2028,10 @@ static u8 hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1987,7 +2046,9 @@ static u8 hci_cc_le_clear_resolv_list(struct hci_dev *hdev, void *data,
if (rp->status)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -2137,7 +2198,7 @@ static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data,
adv_instance->tx_power = rp->tx_power;
}
/* Update adv data as tx power is known now */
- hci_req_update_adv_data(hdev, cp->handle);
+ hci_update_adv_data(hdev, cp->handle);
hci_dev_unlock(hdev);
@@ -2729,7 +2790,7 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags);
if (conn->type == ACL_LINK) {
- if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
+ if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
hci_remove_link_key(hdev, &conn->dst);
}
@@ -2834,7 +2895,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -2859,7 +2920,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -3056,7 +3117,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata,
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, HCI_RSSI_INVALID,
- flags, NULL, 0, NULL, 0);
+ flags, NULL, 0, NULL, 0, 0);
}
hci_dev_unlock(hdev);
@@ -3067,13 +3128,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (ev->status)
+ goto unlock;
+
/* Connection may not exist if auto-connected. Check the bredr
* allowlist to see if this device is allowed to auto connect.
* If link is an ACL type, create a connection class
@@ -3106,8 +3174,25 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- if (!ev->status) {
+ /* The HCI_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+ goto unlock;
+ }
+
+ if (!status) {
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ goto done;
+ }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3137,7 +3222,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES,
sizeof(cp), &cp);
- hci_req_update_scan(hdev);
+ hci_update_scan(hdev);
}
/* Set packet type for incoming connection */
@@ -3148,19 +3233,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp),
&cp);
}
- } else {
- conn->state = BT_CLOSED;
- if (conn->type == ACL_LINK)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, ev->status);
}
if (conn->type == ACL_LINK)
hci_sco_setup(conn, ev->status);
- if (ev->status) {
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+done:
+ if (status) {
+ hci_conn_failed(conn, status);
} else if (ev->link_type == SCO_LINK) {
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_CVSD:
@@ -3169,7 +3249,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
break;
}
- hci_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, status);
}
unlock:
@@ -3206,10 +3286,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
return;
}
+ hci_dev_lock(hdev);
+
if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Require HCI_CONNECTABLE or an accept list entry to accept the
@@ -3221,13 +3303,11 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
!hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Connection accepted */
- hci_dev_lock(hdev);
-
ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
if (ie)
memcpy(ie->data.dev_class, ev->dev_class, 3);
@@ -3239,8 +3319,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
HCI_ROLE_SLAVE);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
- hci_dev_unlock(hdev);
- return;
+ goto unlock;
}
}
@@ -3280,6 +3359,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
conn->state = BT_CONNECT2;
hci_connect_cfm(conn, 0);
}
+
+ return;
+unlock:
+ hci_dev_unlock(hdev);
}
static u8 hci_to_mgmt_reason(u8 err)
@@ -3334,10 +3417,10 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
reason, mgmt_connected);
if (conn->type == ACL_LINK) {
- if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
+ if (test_and_clear_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
hci_remove_link_key(hdev, &conn->dst);
- hci_req_update_scan(hdev);
+ hci_update_scan(hdev);
}
params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
@@ -3497,47 +3580,6 @@ unlock:
hci_dev_unlock(hdev);
}
-static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
- u16 opcode, struct sk_buff *skb)
-{
- const struct hci_rp_read_enc_key_size *rp;
- struct hci_conn *conn;
- u16 handle;
-
- BT_DBG("%s status 0x%02x", hdev->name, status);
-
- if (!skb || skb->len < sizeof(*rp)) {
- bt_dev_err(hdev, "invalid read key size response");
- return;
- }
-
- rp = (void *)skb->data;
- handle = le16_to_cpu(rp->handle);
-
- hci_dev_lock(hdev);
-
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- goto unlock;
-
- /* While unexpected, the read_enc_key_size command may fail. The most
- * secure approach is to then assume the key size is 0 to force a
- * disconnection.
- */
- if (rp->status) {
- bt_dev_err(hdev, "failed to read key size for handle %u",
- handle);
- conn->enc_key_size = 0;
- } else {
- conn->enc_key_size = rp->key_size;
- }
-
- hci_encrypt_cfm(conn, 0);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3602,7 +3644,6 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
/* Try reading the encryption key size for encrypted ACL links */
if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
struct hci_cp_read_enc_key_size cp;
- struct hci_request req;
/* Only send HCI_Read_Encryption_Key_Size if the
* controller really supports it. If it doesn't, assume
@@ -3613,12 +3654,9 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
goto notify;
}
- hci_req_init(&req, hdev);
-
cp.handle = cpu_to_le16(conn->handle);
- hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
-
- if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
+ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+ sizeof(cp), &cp)) {
bt_dev_err(hdev, "sending read key size failed");
conn->enc_key_size = HCI_LINK_KEY_SIZE;
goto notify;
@@ -3729,15 +3767,187 @@ static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd)
{
cancel_delayed_work(&hdev->cmd_timer);
+ rcu_read_lock();
if (!test_bit(HCI_RESET, &hdev->flags)) {
if (ncmd) {
cancel_delayed_work(&hdev->ncmd_timer);
atomic_set(&hdev->cmd_cnt, 1);
} else {
- schedule_delayed_work(&hdev->ncmd_timer,
- HCI_NCMD_TIMEOUT);
+ if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE))
+ queue_delayed_work(hdev->workqueue, &hdev->ncmd_timer,
+ HCI_NCMD_TIMEOUT);
+ }
+ }
+ rcu_read_unlock();
+}
+
+static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_rp_le_read_buffer_size_v2 *rp = data;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+
+ if (rp->status)
+ return rp->status;
+
+ hdev->le_mtu = __le16_to_cpu(rp->acl_mtu);
+ hdev->le_pkts = rp->acl_max_pkt;
+ hdev->iso_mtu = __le16_to_cpu(rp->iso_mtu);
+ hdev->iso_pkts = rp->iso_max_pkt;
+
+ hdev->le_cnt = hdev->le_pkts;
+ hdev->iso_cnt = hdev->iso_pkts;
+
+ BT_DBG("%s acl mtu %d:%d iso mtu %d:%d", hdev->name, hdev->acl_mtu,
+ hdev->acl_pkts, hdev->iso_mtu, hdev->iso_pkts);
+
+ return rp->status;
+}
+
+static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_rp_le_set_cig_params *rp = data;
+ struct hci_conn *conn;
+ int i = 0;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+
+ hci_dev_lock(hdev);
+
+ if (rp->status) {
+ while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) {
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, rp->status);
+ hci_conn_del(conn);
}
+ goto unlock;
+ }
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (conn->type != ISO_LINK || conn->iso_qos.cig != rp->cig_id ||
+ conn->state == BT_CONNECTED)
+ continue;
+
+ conn->handle = __le16_to_cpu(rp->handle[i++]);
+
+ bt_dev_dbg(hdev, "%p handle 0x%4.4x link %p", conn,
+ conn->handle, conn->link);
+
+ /* Create CIS if LE is already connected */
+ if (conn->link && conn->link->state == BT_CONNECTED)
+ hci_le_create_cis(conn->link);
+
+ if (i == rp->num_handles)
+ break;
+ }
+
+ rcu_read_unlock();
+
+unlock:
+ hci_dev_unlock(hdev);
+
+ return rp->status;
+}
+
+static u8 hci_cc_le_setup_iso_path(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_rp_le_setup_iso_path *rp = data;
+ struct hci_cp_le_setup_iso_path *cp;
+ struct hci_conn *conn;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SETUP_ISO_PATH);
+ if (!cp)
+ return rp->status;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+ if (!conn)
+ goto unlock;
+
+ if (rp->status) {
+ hci_connect_cfm(conn, rp->status);
+ hci_conn_del(conn);
+ goto unlock;
+ }
+
+ switch (cp->direction) {
+ /* Input (Host to Controller) */
+ case 0x00:
+ /* Only confirm connection if output only */
+ if (conn->iso_qos.out.sdu && !conn->iso_qos.in.sdu)
+ hci_connect_cfm(conn, rp->status);
+ break;
+ /* Output (Controller to Host) */
+ case 0x01:
+ /* Confirm connection since conn->iso_qos is always configured
+ * last.
+ */
+ hci_connect_cfm(conn, rp->status);
+ break;
}
+
+unlock:
+ hci_dev_unlock(hdev);
+ return rp->status;
+}
+
+static void hci_cs_le_create_big(struct hci_dev *hdev, u8 status)
+{
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+}
+
+static u8 hci_cc_set_per_adv_param(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_ev_status *rp = data;
+ struct hci_cp_le_set_per_adv_params *cp;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+
+ if (rp->status)
+ return rp->status;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_PARAMS);
+ if (!cp)
+ return rp->status;
+
+ /* TODO: set the conn state */
+ return rp->status;
+}
+
+static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_ev_status *rp = data;
+ __u8 *sent;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+
+ if (rp->status)
+ return rp->status;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
+ if (!sent)
+ return rp->status;
+
+ hci_dev_lock(hdev);
+
+ if (*sent)
+ hci_dev_set_flag(hdev, HCI_LE_PER_ADV);
+ else
+ hci_dev_clear_flag(hdev, HCI_LE_PER_ADV);
+
+ hci_dev_unlock(hdev);
+
+ return rp->status;
}
#define HCI_CC_VL(_op, _func, _min, _max) \
@@ -3830,6 +4040,8 @@ static const struct hci_cc {
sizeof(struct hci_rp_read_local_amp_info)),
HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock,
sizeof(struct hci_rp_read_clock)),
+ HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size,
+ sizeof(struct hci_rp_read_enc_key_size)),
HCI_CC(HCI_OP_READ_INQ_RSP_TX_POWER, hci_cc_read_inq_rsp_tx_power,
sizeof(struct hci_rp_read_inq_rsp_tx_power)),
HCI_CC(HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -3913,9 +4125,18 @@ static const struct hci_cc {
hci_cc_le_set_adv_set_random_addr),
HCI_CC_STATUS(HCI_OP_LE_REMOVE_ADV_SET, hci_cc_le_remove_adv_set),
HCI_CC_STATUS(HCI_OP_LE_CLEAR_ADV_SETS, hci_cc_le_clear_adv_sets),
+ HCI_CC_STATUS(HCI_OP_LE_SET_PER_ADV_PARAMS, hci_cc_set_per_adv_param),
+ HCI_CC_STATUS(HCI_OP_LE_SET_PER_ADV_ENABLE,
+ hci_cc_le_set_per_adv_enable),
HCI_CC(HCI_OP_LE_READ_TRANSMIT_POWER, hci_cc_le_read_transmit_power,
sizeof(struct hci_rp_le_read_transmit_power)),
- HCI_CC_STATUS(HCI_OP_LE_SET_PRIVACY_MODE, hci_cc_le_set_privacy_mode)
+ HCI_CC_STATUS(HCI_OP_LE_SET_PRIVACY_MODE, hci_cc_le_set_privacy_mode),
+ HCI_CC(HCI_OP_LE_READ_BUFFER_SIZE_V2, hci_cc_le_read_buffer_size_v2,
+ sizeof(struct hci_rp_le_read_buffer_size_v2)),
+ HCI_CC_VL(HCI_OP_LE_SET_CIG_PARAMS, hci_cc_le_set_cig_params,
+ sizeof(struct hci_rp_le_set_cig_params), HCI_MAX_EVENT_SIZE),
+ HCI_CC(HCI_OP_LE_SETUP_ISO_PATH, hci_cc_le_setup_iso_path,
+ sizeof(struct hci_rp_le_setup_iso_path)),
};
static u8 hci_cc_func(struct hci_dev *hdev, const struct hci_cc *cc,
@@ -3963,6 +4184,17 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data,
}
}
+ if (i == ARRAY_SIZE(hci_cc_table)) {
+ /* Unknown opcode, assume byte 0 contains the status, so
+ * that e.g. __hci_cmd_sync() properly returns errors
+ * for vendor specific commands send by HCI drivers.
+ * If a vendor doesn't actually follow this convention we may
+ * need to introduce a vendor CC table in order to properly set
+ * the status.
+ */
+ *status = skb->data[0];
+ }
+
handle_cmd_cnt_and_timer(hdev, ev->ncmd);
hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
@@ -3978,6 +4210,40 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data,
queue_work(hdev->workqueue, &hdev->cmd_work);
}
+static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
+{
+ struct hci_cp_le_create_cis *cp;
+ int i;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ if (!status)
+ return;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CIS);
+ if (!cp)
+ return;
+
+ hci_dev_lock(hdev);
+
+ /* Remove connection if command failed */
+ for (i = 0; cp->num_cis; cp->num_cis--, i++) {
+ struct hci_conn *conn;
+ u16 handle;
+
+ handle = __le16_to_cpu(cp->cis[i].cis_handle);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (conn) {
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
+ }
+ }
+
+ hci_dev_unlock(hdev);
+}
+
#define HCI_CS(_op, _func) \
{ \
.op = _op, \
@@ -4007,7 +4273,9 @@ static const struct hci_cs {
HCI_CS(HCI_OP_LE_CREATE_CONN, hci_cs_le_create_conn),
HCI_CS(HCI_OP_LE_READ_REMOTE_FEATURES, hci_cs_le_read_remote_features),
HCI_CS(HCI_OP_LE_START_ENC, hci_cs_le_start_enc),
- HCI_CS(HCI_OP_LE_EXT_CREATE_CONN, hci_cs_le_ext_create_conn)
+ HCI_CS(HCI_OP_LE_EXT_CREATE_CONN, hci_cs_le_ext_create_conn),
+ HCI_CS(HCI_OP_LE_CREATE_CIS, hci_cs_le_create_cis),
+ HCI_CS(HCI_OP_LE_CREATE_BIG, hci_cs_le_create_big),
};
static void hci_cmd_status_evt(struct hci_dev *hdev, void *data,
@@ -4143,6 +4411,22 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
hdev->sco_cnt = hdev->sco_pkts;
break;
+ case ISO_LINK:
+ if (hdev->iso_pkts) {
+ hdev->iso_cnt += count;
+ if (hdev->iso_cnt > hdev->iso_pkts)
+ hdev->iso_cnt = hdev->iso_pkts;
+ } else if (hdev->le_pkts) {
+ hdev->le_cnt += count;
+ if (hdev->le_cnt > hdev->le_pkts)
+ hdev->le_cnt = hdev->le_pkts;
+ } else {
+ hdev->acl_cnt += count;
+ if (hdev->acl_cnt > hdev->acl_pkts)
+ hdev->acl_cnt = hdev->acl_pkts;
+ }
+ break;
+
default:
bt_dev_err(hdev, "unknown type %d conn %p",
conn->type, conn);
@@ -4534,7 +4818,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
if (!info) {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
- return;
+ goto unlock;
}
bacpy(&data.bdaddr, &info->bdaddr);
@@ -4550,7 +4834,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- flags, NULL, 0, NULL, 0);
+ flags, NULL, 0, NULL, 0, 0);
}
} else if (skb->len == array_size(ev->num,
sizeof(struct inquiry_info_rssi))) {
@@ -4565,7 +4849,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
if (!info) {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
- return;
+ goto unlock;
}
bacpy(&data.bdaddr, &info->bdaddr);
@@ -4581,13 +4865,13 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- flags, NULL, 0, NULL, 0);
+ flags, NULL, 0, NULL, 0, 0);
}
} else {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
}
-
+unlock:
hci_dev_unlock(hdev);
}
@@ -4660,8 +4944,22 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_sync_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ switch (ev->link_type) {
+ case SCO_LINK:
+ case ESCO_LINK:
+ break;
+ default:
+ /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type
+ * for HCI_Synchronous_Connection_Complete is limited to
+ * either SCO or eSCO
+ */
+ bt_dev_err(hdev, "Ignoring connect complete event for invalid link type");
+ return;
+ }
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
@@ -4684,24 +4982,28 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- switch (ev->status) {
+ /* The HCI_Synchronous_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
+ goto unlock;
+ }
+
+ switch (status) {
case 0x00:
- /* The synchronous connection complete event should only be
- * sent once per new connection. Receiving a successful
- * complete event when the connection status is already
- * BT_CONNECTED means that the device is misbehaving and sent
- * multiple complete event packets for the same new connection.
- *
- * Registering the device more than once can corrupt kernel
- * memory, hence upon detecting this invalid event, we report
- * an error and ignore the packet.
- */
- if (conn->state == BT_CONNECTED) {
- bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
- goto unlock;
+ conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ conn->state = BT_CLOSED;
+ break;
}
- conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4745,8 +5047,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- hci_connect_cfm(conn, ev->status);
- if (ev->status)
+ hci_connect_cfm(conn, status);
+ if (status)
hci_conn_del(conn);
unlock:
@@ -4819,7 +5121,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, void *edata,
mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
info->dev_class, info->rssi,
- flags, info->data, eir_len, NULL, 0);
+ flags, info->data, eir_len, NULL, 0, 0);
}
hci_dev_unlock(hdev);
@@ -5423,8 +5725,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon) {
+ if (hcon && hcon->type == AMP_LINK) {
hcon->state = BT_CLOSED;
+ hci_disconn_cfm(hcon, ev->reason);
hci_conn_del(hcon);
}
@@ -5503,8 +5806,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
*/
hci_dev_clear_flag(hdev, HCI_LE_ADV);
- conn = hci_lookup_le_connect(hdev);
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (status)
+ goto unlock;
+
conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
@@ -5537,6 +5846,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
cancel_delayed_work(&conn->le_conn_timeout);
}
+ /* The HCI_LE_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+ goto unlock;
+ }
+
le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
/* Lookup the identity address from the stored connection
@@ -5556,11 +5876,19 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
- if (status) {
- hci_le_conn_failed(conn, status);
- goto unlock;
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
+ HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
}
+ /* All connection failure handling is taken care of by the
+ * hci_conn_failed function which is triggered by the HCI
+ * request completion callbacks used for connecting.
+ */
+ if (status)
+ goto unlock;
+
if (conn->dst_type == ADDR_LE_DEV_PUBLIC)
addr_type = BDADDR_LE_PUBLIC;
else
@@ -5670,8 +5998,6 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- adv = hci_find_adv_instance(hdev, ev->handle);
-
/* The Bluetooth Core 5.3 specification clearly states that this event
* shall not be sent when the Host disables the advertising set. So in
* case of HCI_ERROR_CANCELLED_BY_HOST, just ignore the event.
@@ -5684,9 +6010,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
return;
}
+ hci_dev_lock(hdev);
+
+ adv = hci_find_adv_instance(hdev, ev->handle);
+
if (ev->status) {
if (!adv)
- return;
+ goto unlock;
/* Remove advertising as it has been terminated */
hci_remove_adv_instance(hdev, ev->handle);
@@ -5694,12 +6024,12 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) {
if (adv->enabled)
- return;
+ goto unlock;
}
/* We are no longer advertising, clear HCI_LE_ADV */
hci_dev_clear_flag(hdev, HCI_LE_ADV);
- return;
+ goto unlock;
}
if (adv)
@@ -5714,16 +6044,19 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
bacmp(&conn->resp_addr, BDADDR_ANY))
- return;
+ goto unlock;
if (!ev->handle) {
bacpy(&conn->resp_addr, &hdev->random_addr);
- return;
+ goto unlock;
}
if (adv)
bacpy(&conn->resp_addr, &adv->random_addr);
}
+
+unlock:
+ hci_dev_unlock(hdev);
}
static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data,
@@ -5844,7 +6177,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *direct_addr,
u8 direct_addr_type, s8 rssi, u8 *data, u8 len,
- bool ext_adv)
+ bool ext_adv, bool ctl_time, u64 instant)
{
struct discovery_state *d = &hdev->discovery;
struct smp_irk *irk;
@@ -5892,7 +6225,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* important to see if the address is matching the local
* controller address.
*/
- if (direct_addr) {
+ if (!hci_dev_test_flag(hdev, HCI_MESH) && direct_addr) {
direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type,
&bdaddr_resolved);
@@ -5940,6 +6273,18 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
conn->le_adv_data_len = len;
}
+ if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND)
+ flags = MGMT_DEV_FOUND_NOT_CONNECTABLE;
+ else
+ flags = 0;
+
+ /* All scan results should be sent up for Mesh systems */
+ if (hci_dev_test_flag(hdev, HCI_MESH)) {
+ mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+ rssi, flags, data, len, NULL, 0, instant);
+ return;
+ }
+
/* Passive scanning shouldn't trigger any device found events,
* except for devices marked as CONN_REPORT for which we do send
* device found events, or advertisement monitoring requested.
@@ -5953,12 +6298,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
idr_is_empty(&hdev->adv_monitors_idr))
return;
- if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND)
- flags = MGMT_DEV_FOUND_NOT_CONNECTABLE;
- else
- flags = 0;
mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
- rssi, flags, data, len, NULL, 0);
+ rssi, flags, data, len, NULL, 0, 0);
return;
}
@@ -5977,11 +6318,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* and just sends a scan response event, then it is marked as
* not connectable as well.
*/
- if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND ||
- type == LE_ADV_SCAN_RSP)
+ if (type == LE_ADV_SCAN_RSP)
flags = MGMT_DEV_FOUND_NOT_CONNECTABLE;
- else
- flags = 0;
/* If there's nothing pending either store the data from this
* event or send an immediate device found event if the data
@@ -5998,7 +6336,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
}
mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
- rssi, flags, data, len, NULL, 0);
+ rssi, flags, data, len, NULL, 0, 0);
return;
}
@@ -6017,7 +6355,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
d->last_adv_addr_type, NULL,
d->last_adv_rssi, d->last_adv_flags,
d->last_adv_data,
- d->last_adv_data_len, NULL, 0);
+ d->last_adv_data_len, NULL, 0, 0);
/* If the new report will trigger a SCAN_REQ store it for
* later merging.
@@ -6034,7 +6372,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
clear_pending_adv_report(hdev);
mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
- rssi, flags, data, len, NULL, 0);
+ rssi, flags, data, len, NULL, 0, 0);
return;
}
@@ -6044,7 +6382,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
d->last_adv_addr_type, NULL, rssi, d->last_adv_flags,
- d->last_adv_data, d->last_adv_data_len, data, len);
+ d->last_adv_data, d->last_adv_data_len, data, len, 0);
clear_pending_adv_report(hdev);
}
@@ -6052,6 +6390,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_ev_le_advertising_report *ev = data;
+ u64 instant = jiffies;
if (!ev->num)
return;
@@ -6076,7 +6415,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, NULL, 0, rssi,
- info->data, info->length, false);
+ info->data, info->length, false,
+ false, instant);
} else {
bt_dev_err(hdev, "Dropping invalid advertising data");
}
@@ -6133,6 +6473,7 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_ev_le_ext_adv_report *ev = data;
+ u64 instant = jiffies;
if (!ev->num)
return;
@@ -6159,13 +6500,47 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
info->rssi, info->data, info->length,
- !(evt_type & LE_EXT_ADV_LEGACY_PDU));
+ !(evt_type & LE_EXT_ADV_LEGACY_PDU),
+ false, instant);
}
}
hci_dev_unlock(hdev);
}
+static int hci_le_pa_term_sync(struct hci_dev *hdev, __le16 handle)
+{
+ struct hci_cp_le_pa_term_sync cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = handle;
+
+ return hci_send_cmd(hdev, HCI_OP_LE_PA_TERM_SYNC, sizeof(cp), &cp);
+}
+
+static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_pa_sync_established *ev = data;
+ int mask = hdev->link_mode;
+ __u8 flags = 0;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+
+ if (ev->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+
+ mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags);
+ if (!(mask & HCI_LM_ACCEPT))
+ hci_le_pa_term_sync(hdev, ev->handle);
+
+ hci_dev_unlock(hdev);
+}
+
static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -6349,6 +6724,7 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_ev_le_direct_adv_report *ev = data;
+ u64 instant = jiffies;
int i;
if (!hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_DIRECT_ADV_REPORT,
@@ -6366,7 +6742,7 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data,
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, &info->direct_addr,
info->direct_addr_type, info->rssi, NULL, 0,
- false);
+ false, false, instant);
}
hci_dev_unlock(hdev);
@@ -6396,6 +6772,240 @@ unlock:
hci_dev_unlock(hdev);
}
+static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_evt_le_cis_established *ev = data;
+ struct hci_conn *conn;
+ u16 handle = __le16_to_cpu(ev->handle);
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn) {
+ bt_dev_err(hdev,
+ "Unable to find connection with handle 0x%4.4x",
+ handle);
+ goto unlock;
+ }
+
+ if (conn->type != ISO_LINK) {
+ bt_dev_err(hdev,
+ "Invalid connection link type handle 0x%4.4x",
+ handle);
+ goto unlock;
+ }
+
+ if (conn->role == HCI_ROLE_SLAVE) {
+ __le32 interval;
+
+ memset(&interval, 0, sizeof(interval));
+
+ memcpy(&interval, ev->c_latency, sizeof(ev->c_latency));
+ conn->iso_qos.in.interval = le32_to_cpu(interval);
+ memcpy(&interval, ev->p_latency, sizeof(ev->p_latency));
+ conn->iso_qos.out.interval = le32_to_cpu(interval);
+ conn->iso_qos.in.latency = le16_to_cpu(ev->interval);
+ conn->iso_qos.out.latency = le16_to_cpu(ev->interval);
+ conn->iso_qos.in.sdu = le16_to_cpu(ev->c_mtu);
+ conn->iso_qos.out.sdu = le16_to_cpu(ev->p_mtu);
+ conn->iso_qos.in.phy = ev->c_phy;
+ conn->iso_qos.out.phy = ev->p_phy;
+ }
+
+ if (!ev->status) {
+ conn->state = BT_CONNECTED;
+ hci_debugfs_create_conn(conn);
+ hci_conn_add_sysfs(conn);
+ hci_iso_setup_path(conn);
+ goto unlock;
+ }
+
+ hci_connect_cfm(conn, ev->status);
+ hci_conn_del(conn);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_reject_cis(struct hci_dev *hdev, __le16 handle)
+{
+ struct hci_cp_le_reject_cis cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = handle;
+ cp.reason = HCI_ERROR_REJ_BAD_ADDR;
+ hci_send_cmd(hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp);
+}
+
+static void hci_le_accept_cis(struct hci_dev *hdev, __le16 handle)
+{
+ struct hci_cp_le_accept_cis cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = handle;
+ hci_send_cmd(hdev, HCI_OP_LE_ACCEPT_CIS, sizeof(cp), &cp);
+}
+
+static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_evt_le_cis_req *ev = data;
+ u16 acl_handle, cis_handle;
+ struct hci_conn *acl, *cis;
+ int mask;
+ __u8 flags = 0;
+
+ acl_handle = __le16_to_cpu(ev->acl_handle);
+ cis_handle = __le16_to_cpu(ev->cis_handle);
+
+ bt_dev_dbg(hdev, "acl 0x%4.4x handle 0x%4.4x cig 0x%2.2x cis 0x%2.2x",
+ acl_handle, cis_handle, ev->cig_id, ev->cis_id);
+
+ hci_dev_lock(hdev);
+
+ acl = hci_conn_hash_lookup_handle(hdev, acl_handle);
+ if (!acl)
+ goto unlock;
+
+ mask = hci_proto_connect_ind(hdev, &acl->dst, ISO_LINK, &flags);
+ if (!(mask & HCI_LM_ACCEPT)) {
+ hci_le_reject_cis(hdev, ev->cis_handle);
+ goto unlock;
+ }
+
+ cis = hci_conn_hash_lookup_handle(hdev, cis_handle);
+ if (!cis) {
+ cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE);
+ if (!cis) {
+ hci_le_reject_cis(hdev, ev->cis_handle);
+ goto unlock;
+ }
+ cis->handle = cis_handle;
+ }
+
+ cis->iso_qos.cig = ev->cig_id;
+ cis->iso_qos.cis = ev->cis_id;
+
+ if (!(flags & HCI_PROTO_DEFER)) {
+ hci_le_accept_cis(hdev, ev->cis_handle);
+ } else {
+ cis->state = BT_CONNECT2;
+ hci_connect_cfm(cis, 0);
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_evt_le_create_big_complete *ev = data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+ if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_CREATE_BIG_COMPLETE,
+ flex_array_size(ev, bis_handle, ev->num_bis)))
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_big(hdev, ev->handle);
+ if (!conn)
+ goto unlock;
+
+ if (conn->type != ISO_LINK) {
+ bt_dev_err(hdev,
+ "Invalid connection link type handle 0x%2.2x",
+ ev->handle);
+ goto unlock;
+ }
+
+ if (ev->num_bis)
+ conn->handle = __le16_to_cpu(ev->bis_handle[0]);
+
+ if (!ev->status) {
+ conn->state = BT_CONNECTED;
+ hci_debugfs_create_conn(conn);
+ hci_conn_add_sysfs(conn);
+ hci_iso_setup_path(conn);
+ goto unlock;
+ }
+
+ hci_connect_cfm(conn, ev->status);
+ hci_conn_del(conn);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_evt_le_big_sync_estabilished *ev = data;
+ struct hci_conn *bis;
+ int i;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+
+ if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABILISHED,
+ flex_array_size(ev, bis, ev->num_bis)))
+ return;
+
+ if (ev->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ for (i = 0; i < ev->num_bis; i++) {
+ u16 handle = le16_to_cpu(ev->bis[i]);
+ __le32 interval;
+
+ bis = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!bis) {
+ bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
+ if (!bis)
+ continue;
+ bis->handle = handle;
+ }
+
+ bis->iso_qos.big = ev->handle;
+ memset(&interval, 0, sizeof(interval));
+ memcpy(&interval, ev->latency, sizeof(ev->latency));
+ bis->iso_qos.in.interval = le32_to_cpu(interval);
+ /* Convert ISO Interval (1.25 ms slots) to latency (ms) */
+ bis->iso_qos.in.latency = le16_to_cpu(ev->interval) * 125 / 100;
+ bis->iso_qos.in.sdu = le16_to_cpu(ev->max_pdu);
+
+ hci_connect_cfm(bis, ev->status);
+ }
+
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_evt_le_big_info_adv_report *ev = data;
+ int mask = hdev->link_mode;
+ __u8 flags = 0;
+
+ bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
+
+ hci_dev_lock(hdev);
+
+ mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
+ if (!(mask & HCI_LM_ACCEPT))
+ hci_le_pa_term_sync(hdev, ev->sync_handle);
+
+ hci_dev_unlock(hdev);
+}
+
#define HCI_LE_EV_VL(_op, _func, _min_len, _max_len) \
[_op] = { \
.func = _func, \
@@ -6456,9 +7066,34 @@ static const struct hci_le_ev {
HCI_LE_EV_VL(HCI_EV_LE_EXT_ADV_REPORT, hci_le_ext_adv_report_evt,
sizeof(struct hci_ev_le_ext_adv_report),
HCI_MAX_EVENT_SIZE),
+ /* [0x0e = HCI_EV_LE_PA_SYNC_ESTABLISHED] */
+ HCI_LE_EV(HCI_EV_LE_PA_SYNC_ESTABLISHED,
+ hci_le_pa_sync_estabilished_evt,
+ sizeof(struct hci_ev_le_pa_sync_established)),
/* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */
HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt,
sizeof(struct hci_evt_le_ext_adv_set_term)),
+ /* [0x19 = HCI_EVT_LE_CIS_ESTABLISHED] */
+ HCI_LE_EV(HCI_EVT_LE_CIS_ESTABLISHED, hci_le_cis_estabilished_evt,
+ sizeof(struct hci_evt_le_cis_established)),
+ /* [0x1a = HCI_EVT_LE_CIS_REQ] */
+ HCI_LE_EV(HCI_EVT_LE_CIS_REQ, hci_le_cis_req_evt,
+ sizeof(struct hci_evt_le_cis_req)),
+ /* [0x1b = HCI_EVT_LE_CREATE_BIG_COMPLETE] */
+ HCI_LE_EV_VL(HCI_EVT_LE_CREATE_BIG_COMPLETE,
+ hci_le_create_big_complete_evt,
+ sizeof(struct hci_evt_le_create_big_complete),
+ HCI_MAX_EVENT_SIZE),
+ /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABILISHED] */
+ HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABILISHED,
+ hci_le_big_sync_established_evt,
+ sizeof(struct hci_evt_le_big_sync_estabilished),
+ HCI_MAX_EVENT_SIZE),
+ /* [0x22 = HCI_EVT_LE_BIG_INFO_ADV_REPORT] */
+ HCI_LE_EV_VL(HCI_EVT_LE_BIG_INFO_ADV_REPORT,
+ hci_le_big_info_adv_report_evt,
+ sizeof(struct hci_evt_le_big_info_adv_report),
+ HCI_MAX_EVENT_SIZE),
};
static void hci_le_meta_evt(struct hci_dev *hdev, void *data,
@@ -6497,7 +7132,6 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data,
if (skb->len > subev->max_len)
bt_dev_warn(hdev, "unexpected subevent 0x%2.2x length: %u > %u",
ev->subevent, skb->len, subev->max_len);
-
data = hci_le_ev_skb_pull(hdev, skb, ev->subevent, subev->min_len);
if (!data)
return;
@@ -6798,7 +7432,7 @@ static const struct hci_ev {
HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
sizeof(struct hci_ev_num_comp_blocks)),
/* [0xff = HCI_EV_VENDOR] */
- HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0),
+ HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
};
static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
@@ -6823,8 +7457,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
* decide if that is acceptable.
*/
if (skb->len > ev->max_len)
- bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u",
- event, skb->len, ev->max_len);
+ bt_dev_warn_ratelimited(hdev,
+ "unexpected event 0x%2.2x length: %u > %u",
+ event, skb->len, ev->max_len);
data = hci_ev_skb_pull(hdev, skb, event, ev->min_len);
if (!data)
@@ -6851,6 +7486,9 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
goto done;
}
+ kfree_skb(hdev->recv_event);
+ hdev->recv_event = skb_clone(skb, GFP_KERNEL);
+
event = hdr->evt;
if (!event) {
bt_dev_warn(hdev, "Received unexpected HCI Event 0x%2.2x",
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 42c8047a9897..5a0296a4352e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -261,7 +261,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
if (skb_queue_empty(&req->cmd_q))
bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
- bt_cb(skb)->hci.req_event = event;
+ hci_skb_event(skb) = event;
skb_queue_tail(&req->cmd_q, skb);
}
@@ -269,42 +269,10 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
const void *param)
{
+ bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode);
hci_req_add_ev(req, opcode, plen, param, 0);
}
-void __hci_req_write_fast_connectable(struct hci_request *req, bool enable)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_page_scan_activity acp;
- u8 type;
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- return;
-
- if (hdev->hci_ver < BLUETOOTH_VER_1_2)
- return;
-
- if (enable) {
- type = PAGE_SCAN_TYPE_INTERLACED;
-
- /* 160 msec page scan interval */
- acp.interval = cpu_to_le16(0x0100);
- } else {
- type = hdev->def_page_scan_type;
- acp.interval = cpu_to_le16(hdev->def_page_scan_int);
- }
-
- acp.window = cpu_to_le16(hdev->def_page_scan_window);
-
- if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
- __cpu_to_le16(hdev->page_scan_window) != acp.window)
- hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
- sizeof(acp), &acp);
-
- if (hdev->page_scan_type != type)
- hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
-}
-
static void start_interleave_scan(struct hci_dev *hdev)
{
hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
@@ -357,45 +325,6 @@ static bool __hci_update_interleaved_scan(struct hci_dev *hdev)
return false;
}
-void __hci_req_update_name(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_local_name cp;
-
- memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
-
- hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
-}
-
-void __hci_req_update_eir(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_eir cp;
-
- if (!hdev_is_powered(hdev))
- return;
-
- if (!lmp_ext_inq_capable(hdev))
- return;
-
- if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
- return;
-
- if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
- return;
-
- memset(&cp, 0, sizeof(cp));
-
- eir_create(hdev, cp.data);
-
- if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
- return;
-
- memcpy(hdev->eir, cp.data, sizeof(cp.data));
-
- hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
-}
-
void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn)
{
struct hci_dev *hdev = req->hdev;
@@ -482,7 +411,7 @@ static int add_to_accept_list(struct hci_request *req,
/* During suspend, only wakeable devices can be in accept list */
if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
return 0;
*num_entries += 1;
@@ -721,6 +650,96 @@ static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
return false;
}
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa);
+static int hci_update_random_address(struct hci_request *req,
+ bool require_privacy, bool use_rpa,
+ u8 *own_addr_type)
+{
+ struct hci_dev *hdev = req->hdev;
+ int err;
+
+ /* If privacy is enabled use a resolvable private address. If
+ * current RPA has expired or there is something else than
+ * the current RPA in use, then generate a new one.
+ */
+ if (use_rpa) {
+ /* If Controller supports LL Privacy use own address type is
+ * 0x03
+ */
+ if (use_ll_privacy(hdev))
+ *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
+ else
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+
+ if (rpa_valid(hdev))
+ return 0;
+
+ err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
+ if (err < 0) {
+ bt_dev_err(hdev, "failed to generate new RPA");
+ return err;
+ }
+
+ set_random_addr(req, &hdev->rpa);
+
+ return 0;
+ }
+
+ /* In case of required privacy without resolvable private address,
+ * use an non-resolvable private address. This is useful for active
+ * scanning and non-connectable advertising.
+ */
+ if (require_privacy) {
+ bdaddr_t nrpa;
+
+ while (true) {
+ /* The non-resolvable private address is generated
+ * from random six bytes with the two most significant
+ * bits cleared.
+ */
+ get_random_bytes(&nrpa, 6);
+ nrpa.b[5] &= 0x3f;
+
+ /* The non-resolvable private address shall not be
+ * equal to the public address.
+ */
+ if (bacmp(&hdev->bdaddr, &nrpa))
+ break;
+ }
+
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+ set_random_addr(req, &nrpa);
+ return 0;
+ }
+
+ /* If forcing static address is in use or there is no public
+ * address use the static address as random address (but skip
+ * the HCI command if the current random address is already the
+ * static one.
+ *
+ * In case BR/EDR has been disabled on a dual-mode controller
+ * and a static address has been configured, then use that
+ * address instead of the public BR/EDR address.
+ */
+ if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
+ !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
+ (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
+ bacmp(&hdev->static_addr, BDADDR_ANY))) {
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+ if (bacmp(&hdev->static_addr, &hdev->random_addr))
+ hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+ &hdev->static_addr);
+ return 0;
+ }
+
+ /* Neither privacy nor static address is being used so use a
+ * public address.
+ */
+ *own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+ return 0;
+}
+
/* Ensure to call hci_req_add_le_scan_disable() first to disable the
* controller based address resolution to be able to reconfigure
* resolving list.
@@ -810,367 +829,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
addr_resolv);
}
-static void cancel_adv_timeout(struct hci_dev *hdev)
-{
- if (hdev->adv_instance_timeout) {
- hdev->adv_instance_timeout = 0;
- cancel_delayed_work(&hdev->adv_instance_expire);
- }
-}
-
-static bool adv_cur_instance_is_scannable(struct hci_dev *hdev)
-{
- return hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance);
-}
-
-void __hci_req_disable_advertising(struct hci_request *req)
-{
- if (ext_adv_capable(req->hdev)) {
- __hci_req_disable_ext_adv_instance(req, 0x00);
-
- } else {
- u8 enable = 0x00;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
- }
-}
-
-static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
-{
- /* If privacy is not enabled don't use RPA */
- if (!hci_dev_test_flag(hdev, HCI_PRIVACY))
- return false;
-
- /* If basic privacy mode is enabled use RPA */
- if (!hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
- return true;
-
- /* If limited privacy mode is enabled don't use RPA if we're
- * both discoverable and bondable.
- */
- if ((flags & MGMT_ADV_FLAG_DISCOV) &&
- hci_dev_test_flag(hdev, HCI_BONDABLE))
- return false;
-
- /* We're neither bondable nor discoverable in the limited
- * privacy mode, therefore use RPA.
- */
- return true;
-}
-
-static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
-{
- /* If there is no connection we are OK to advertise. */
- if (hci_conn_num(hdev, LE_LINK) == 0)
- return true;
-
- /* Check le_states if there is any connection in peripheral role. */
- if (hdev->conn_hash.le_num_peripheral > 0) {
- /* Peripheral connection state and non connectable mode bit 20.
- */
- if (!connectable && !(hdev->le_states[2] & 0x10))
- return false;
-
- /* Peripheral connection state and connectable mode bit 38
- * and scannable bit 21.
- */
- if (connectable && (!(hdev->le_states[4] & 0x40) ||
- !(hdev->le_states[2] & 0x20)))
- return false;
- }
-
- /* Check le_states if there is any connection in central role. */
- if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_peripheral) {
- /* Central connection state and non connectable mode bit 18. */
- if (!connectable && !(hdev->le_states[2] & 0x02))
- return false;
-
- /* Central connection state and connectable mode bit 35 and
- * scannable 19.
- */
- if (connectable && (!(hdev->le_states[4] & 0x08) ||
- !(hdev->le_states[2] & 0x08)))
- return false;
- }
-
- return true;
-}
-
-void __hci_req_enable_advertising(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct adv_info *adv;
- struct hci_cp_le_set_adv_param cp;
- u8 own_addr_type, enable = 0x01;
- bool connectable;
- u16 adv_min_interval, adv_max_interval;
- u32 flags;
-
- flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance);
- adv = hci_find_adv_instance(hdev, hdev->cur_adv_instance);
-
- /* If the "connectable" instance flag was not set, then choose between
- * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
- */
- connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
- mgmt_get_connectable(hdev);
-
- if (!is_advertising_allowed(hdev, connectable))
- return;
-
- if (hci_dev_test_flag(hdev, HCI_LE_ADV))
- __hci_req_disable_advertising(req);
-
- /* Clear the HCI_LE_ADV bit temporarily so that the
- * hci_update_random_address knows that it's safe to go ahead
- * and write a new random address. The flag will be set back on
- * as soon as the SET_ADV_ENABLE HCI command completes.
- */
- hci_dev_clear_flag(hdev, HCI_LE_ADV);
-
- /* Set require_privacy to true only when non-connectable
- * advertising is used. In that case it is fine to use a
- * non-resolvable private address.
- */
- if (hci_update_random_address(req, !connectable,
- adv_use_rpa(hdev, flags),
- &own_addr_type) < 0)
- return;
-
- memset(&cp, 0, sizeof(cp));
-
- if (adv) {
- adv_min_interval = adv->min_interval;
- adv_max_interval = adv->max_interval;
- } else {
- adv_min_interval = hdev->le_adv_min_interval;
- adv_max_interval = hdev->le_adv_max_interval;
- }
-
- if (connectable) {
- cp.type = LE_ADV_IND;
- } else {
- if (adv_cur_instance_is_scannable(hdev))
- cp.type = LE_ADV_SCAN_IND;
- else
- cp.type = LE_ADV_NONCONN_IND;
-
- if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) ||
- hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
- adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN;
- adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX;
- }
- }
-
- cp.min_interval = cpu_to_le16(adv_min_interval);
- cp.max_interval = cpu_to_le16(adv_max_interval);
- cp.own_address_type = own_addr_type;
- cp.channel_map = hdev->le_adv_channel_map;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
-}
-
-void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- u8 len;
-
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return;
-
- if (ext_adv_capable(hdev)) {
- struct {
- struct hci_cp_le_set_ext_scan_rsp_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
-
- memset(&pdu, 0, sizeof(pdu));
-
- len = eir_create_scan_rsp(hdev, instance, pdu.data);
-
- if (hdev->scan_rsp_data_len == len &&
- !memcmp(pdu.data, hdev->scan_rsp_data, len))
- return;
-
- memcpy(hdev->scan_rsp_data, pdu.data, len);
- hdev->scan_rsp_data_len = len;
-
- pdu.cp.handle = instance;
- pdu.cp.length = len;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
- sizeof(pdu.cp) + len, &pdu.cp);
- } else {
- struct hci_cp_le_set_scan_rsp_data cp;
-
- memset(&cp, 0, sizeof(cp));
-
- len = eir_create_scan_rsp(hdev, instance, cp.data);
-
- if (hdev->scan_rsp_data_len == len &&
- !memcmp(cp.data, hdev->scan_rsp_data, len))
- return;
-
- memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
- hdev->scan_rsp_data_len = len;
-
- cp.length = len;
-
- hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp);
- }
-}
-
-void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- u8 len;
-
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return;
-
- if (ext_adv_capable(hdev)) {
- struct {
- struct hci_cp_le_set_ext_adv_data cp;
- u8 data[HCI_MAX_EXT_AD_LENGTH];
- } pdu;
-
- memset(&pdu, 0, sizeof(pdu));
-
- len = eir_create_adv_data(hdev, instance, pdu.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(pdu.data, hdev->adv_data, len) == 0)
- return;
-
- memcpy(hdev->adv_data, pdu.data, len);
- hdev->adv_data_len = len;
-
- pdu.cp.length = len;
- pdu.cp.handle = instance;
- pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu.cp);
- } else {
- struct hci_cp_le_set_adv_data cp;
-
- memset(&cp, 0, sizeof(cp));
-
- len = eir_create_adv_data(hdev, instance, cp.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
- return;
-
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
- hdev->adv_data_len = len;
-
- cp.length = len;
-
- hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
- }
-}
-
-int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance)
-{
- struct hci_request req;
-
- hci_req_init(&req, hdev);
- __hci_req_update_adv_data(&req, instance);
-
- return hci_req_run(&req, NULL);
-}
-
-static void enable_addr_resolution_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- BT_DBG("%s status %u", hdev->name, status);
-}
-
-void hci_req_disable_address_resolution(struct hci_dev *hdev)
-{
- struct hci_request req;
- __u8 enable = 0x00;
-
- if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION))
- return;
-
- hci_req_init(&req, hdev);
-
- hci_req_add(&req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
-
- hci_req_run(&req, enable_addr_resolution_complete);
-}
-
-static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
-{
- bt_dev_dbg(hdev, "status %u", status);
-}
-
-void hci_req_reenable_advertising(struct hci_dev *hdev)
-{
- struct hci_request req;
-
- if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
- list_empty(&hdev->adv_instances))
- return;
-
- hci_req_init(&req, hdev);
-
- if (hdev->cur_adv_instance) {
- __hci_req_schedule_adv_instance(&req, hdev->cur_adv_instance,
- true);
- } else {
- if (ext_adv_capable(hdev)) {
- __hci_req_start_ext_adv(&req, 0x00);
- } else {
- __hci_req_update_adv_data(&req, 0x00);
- __hci_req_update_scan_rsp_data(&req, 0x00);
- __hci_req_enable_advertising(&req);
- }
- }
-
- hci_req_run(&req, adv_enable_complete);
-}
-
-static void adv_timeout_expire(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- adv_instance_expire.work);
-
- struct hci_request req;
- u8 instance;
-
- bt_dev_dbg(hdev, "");
-
- hci_dev_lock(hdev);
-
- hdev->adv_instance_timeout = 0;
-
- instance = hdev->cur_adv_instance;
- if (instance == 0x00)
- goto unlock;
-
- hci_req_init(&req, hdev);
-
- hci_req_clear_adv_instance(hdev, NULL, &req, instance, false);
-
- if (list_empty(&hdev->adv_instances))
- __hci_req_disable_advertising(&req);
-
- hci_req_run(&req, NULL);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
static int hci_req_add_le_interleaved_scan(struct hci_request *req,
unsigned long opt)
{
@@ -1227,84 +885,6 @@ static void interleave_scan_work(struct work_struct *work)
&hdev->interleave_scan, timeout);
}
-int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
- bool use_rpa, struct adv_info *adv_instance,
- u8 *own_addr_type, bdaddr_t *rand_addr)
-{
- int err;
-
- bacpy(rand_addr, BDADDR_ANY);
-
- /* If privacy is enabled use a resolvable private address. If
- * current RPA has expired then generate a new one.
- */
- if (use_rpa) {
- /* If Controller supports LL Privacy use own address type is
- * 0x03
- */
- if (use_ll_privacy(hdev))
- *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
- else
- *own_addr_type = ADDR_LE_DEV_RANDOM;
-
- if (adv_instance) {
- if (adv_rpa_valid(adv_instance))
- return 0;
- } else {
- if (rpa_valid(hdev))
- return 0;
- }
-
- err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
- if (err < 0) {
- bt_dev_err(hdev, "failed to generate new RPA");
- return err;
- }
-
- bacpy(rand_addr, &hdev->rpa);
-
- return 0;
- }
-
- /* In case of required privacy without resolvable private address,
- * use an non-resolvable private address. This is useful for
- * non-connectable advertising.
- */
- if (require_privacy) {
- bdaddr_t nrpa;
-
- while (true) {
- /* The non-resolvable private address is generated
- * from random six bytes with the two most significant
- * bits cleared.
- */
- get_random_bytes(&nrpa, 6);
- nrpa.b[5] &= 0x3f;
-
- /* The non-resolvable private address shall not be
- * equal to the public address.
- */
- if (bacmp(&hdev->bdaddr, &nrpa))
- break;
- }
-
- *own_addr_type = ADDR_LE_DEV_RANDOM;
- bacpy(rand_addr, &nrpa);
-
- return 0;
- }
-
- /* No privacy so use a public address. */
- *own_addr_type = ADDR_LE_DEV_PUBLIC;
-
- return 0;
-}
-
-void __hci_req_clear_ext_adv_sets(struct hci_request *req)
-{
- hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL);
-}
-
static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
{
struct hci_dev *hdev = req->hdev;
@@ -1329,1314 +909,8 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
}
-int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
-{
- struct hci_cp_le_set_ext_adv_params cp;
- struct hci_dev *hdev = req->hdev;
- bool connectable;
- u32 flags;
- bdaddr_t random_addr;
- u8 own_addr_type;
- int err;
- struct adv_info *adv_instance;
- bool secondary_adv;
-
- if (instance > 0) {
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return -EINVAL;
- } else {
- adv_instance = NULL;
- }
-
- flags = hci_adv_instance_flags(hdev, instance);
-
- /* If the "connectable" instance flag was not set, then choose between
- * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
- */
- connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
- mgmt_get_connectable(hdev);
-
- if (!is_advertising_allowed(hdev, connectable))
- return -EPERM;
-
- /* Set require_privacy to true only when non-connectable
- * advertising is used. In that case it is fine to use a
- * non-resolvable private address.
- */
- err = hci_get_random_address(hdev, !connectable,
- adv_use_rpa(hdev, flags), adv_instance,
- &own_addr_type, &random_addr);
- if (err < 0)
- return err;
-
- memset(&cp, 0, sizeof(cp));
-
- if (adv_instance) {
- hci_cpu_to_le24(adv_instance->min_interval, cp.min_interval);
- hci_cpu_to_le24(adv_instance->max_interval, cp.max_interval);
- cp.tx_power = adv_instance->tx_power;
- } else {
- hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval);
- hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval);
- cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE;
- }
-
- secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK);
-
- if (connectable) {
- if (secondary_adv)
- cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND);
- else
- cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
- } else if (hci_adv_instance_is_scannable(hdev, instance) ||
- (flags & MGMT_ADV_PARAM_SCAN_RSP)) {
- if (secondary_adv)
- cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND);
- else
- cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND);
- } else {
- if (secondary_adv)
- cp.evt_properties = cpu_to_le16(LE_EXT_ADV_NON_CONN_IND);
- else
- cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND);
- }
-
- cp.own_addr_type = own_addr_type;
- cp.channel_map = hdev->le_adv_channel_map;
- cp.handle = instance;
-
- if (flags & MGMT_ADV_FLAG_SEC_2M) {
- cp.primary_phy = HCI_ADV_PHY_1M;
- cp.secondary_phy = HCI_ADV_PHY_2M;
- } else if (flags & MGMT_ADV_FLAG_SEC_CODED) {
- cp.primary_phy = HCI_ADV_PHY_CODED;
- cp.secondary_phy = HCI_ADV_PHY_CODED;
- } else {
- /* In all other cases use 1M */
- cp.primary_phy = HCI_ADV_PHY_1M;
- cp.secondary_phy = HCI_ADV_PHY_1M;
- }
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp);
-
- if ((own_addr_type == ADDR_LE_DEV_RANDOM ||
- own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED) &&
- bacmp(&random_addr, BDADDR_ANY)) {
- struct hci_cp_le_set_adv_set_rand_addr cp;
-
- /* Check if random address need to be updated */
- if (adv_instance) {
- if (!bacmp(&random_addr, &adv_instance->random_addr))
- return 0;
- } else {
- if (!bacmp(&random_addr, &hdev->random_addr))
- return 0;
- /* Instance 0x00 doesn't have an adv_info, instead it
- * uses hdev->random_addr to track its address so
- * whenever it needs to be updated this also set the
- * random address since hdev->random_addr is shared with
- * scan state machine.
- */
- set_random_addr(req, &random_addr);
- }
-
- memset(&cp, 0, sizeof(cp));
-
- cp.handle = instance;
- bacpy(&cp.bdaddr, &random_addr);
-
- hci_req_add(req,
- HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
- sizeof(cp), &cp);
- }
-
- return 0;
-}
-
-int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_ext_adv_enable *cp;
- struct hci_cp_ext_adv_set *adv_set;
- u8 data[sizeof(*cp) + sizeof(*adv_set) * 1];
- struct adv_info *adv_instance;
-
- if (instance > 0) {
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return -EINVAL;
- } else {
- adv_instance = NULL;
- }
-
- cp = (void *) data;
- adv_set = (void *) cp->data;
-
- memset(cp, 0, sizeof(*cp));
-
- cp->enable = 0x01;
- cp->num_of_sets = 0x01;
-
- memset(adv_set, 0, sizeof(*adv_set));
-
- adv_set->handle = instance;
-
- /* Set duration per instance since controller is responsible for
- * scheduling it.
- */
- if (adv_instance && adv_instance->duration) {
- u16 duration = adv_instance->timeout * MSEC_PER_SEC;
-
- /* Time = N * 10 ms */
- adv_set->duration = cpu_to_le16(duration / 10);
- }
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE,
- sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets,
- data);
-
- return 0;
-}
-
-int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_le_set_ext_adv_enable *cp;
- struct hci_cp_ext_adv_set *adv_set;
- u8 data[sizeof(*cp) + sizeof(*adv_set) * 1];
- u8 req_size;
-
- /* If request specifies an instance that doesn't exist, fail */
- if (instance > 0 && !hci_find_adv_instance(hdev, instance))
- return -EINVAL;
-
- memset(data, 0, sizeof(data));
-
- cp = (void *)data;
- adv_set = (void *)cp->data;
-
- /* Instance 0x00 indicates all advertising instances will be disabled */
- cp->num_of_sets = !!instance;
- cp->enable = 0x00;
-
- adv_set->handle = instance;
-
- req_size = sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets;
- hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, req_size, data);
-
- return 0;
-}
-
-int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
-
- /* If request specifies an instance that doesn't exist, fail */
- if (instance > 0 && !hci_find_adv_instance(hdev, instance))
- return -EINVAL;
-
- hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(instance), &instance);
-
- return 0;
-}
-
-int __hci_req_start_ext_adv(struct hci_request *req, u8 instance)
-{
- struct hci_dev *hdev = req->hdev;
- struct adv_info *adv_instance = hci_find_adv_instance(hdev, instance);
- int err;
-
- /* If instance isn't pending, the chip knows about it, and it's safe to
- * disable
- */
- if (adv_instance && !adv_instance->pending)
- __hci_req_disable_ext_adv_instance(req, instance);
-
- err = __hci_req_setup_ext_adv_instance(req, instance);
- if (err < 0)
- return err;
-
- __hci_req_update_scan_rsp_data(req, instance);
- __hci_req_enable_ext_advertising(req, instance);
-
- return 0;
-}
-
-int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
- bool force)
-{
- struct hci_dev *hdev = req->hdev;
- struct adv_info *adv_instance = NULL;
- u16 timeout;
-
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- list_empty(&hdev->adv_instances))
- return -EPERM;
-
- if (hdev->adv_instance_timeout)
- return -EBUSY;
-
- adv_instance = hci_find_adv_instance(hdev, instance);
- if (!adv_instance)
- return -ENOENT;
-
- /* A zero timeout means unlimited advertising. As long as there is
- * only one instance, duration should be ignored. We still set a timeout
- * in case further instances are being added later on.
- *
- * If the remaining lifetime of the instance is more than the duration
- * then the timeout corresponds to the duration, otherwise it will be
- * reduced to the remaining instance lifetime.
- */
- if (adv_instance->timeout == 0 ||
- adv_instance->duration <= adv_instance->remaining_time)
- timeout = adv_instance->duration;
- else
- timeout = adv_instance->remaining_time;
-
- /* The remaining time is being reduced unless the instance is being
- * advertised without time limit.
- */
- if (adv_instance->timeout)
- adv_instance->remaining_time =
- adv_instance->remaining_time - timeout;
-
- /* Only use work for scheduling instances with legacy advertising */
- if (!ext_adv_capable(hdev)) {
- hdev->adv_instance_timeout = timeout;
- queue_delayed_work(hdev->req_workqueue,
- &hdev->adv_instance_expire,
- msecs_to_jiffies(timeout * 1000));
- }
-
- /* If we're just re-scheduling the same instance again then do not
- * execute any HCI commands. This happens when a single instance is
- * being advertised.
- */
- if (!force && hdev->cur_adv_instance == instance &&
- hci_dev_test_flag(hdev, HCI_LE_ADV))
- return 0;
-
- hdev->cur_adv_instance = instance;
- if (ext_adv_capable(hdev)) {
- __hci_req_start_ext_adv(req, instance);
- } else {
- __hci_req_update_adv_data(req, instance);
- __hci_req_update_scan_rsp_data(req, instance);
- __hci_req_enable_advertising(req);
- }
-
- return 0;
-}
-
-/* For a single instance:
- * - force == true: The instance will be removed even when its remaining
- * lifetime is not zero.
- * - force == false: the instance will be deactivated but kept stored unless
- * the remaining lifetime is zero.
- *
- * For instance == 0x00:
- * - force == true: All instances will be removed regardless of their timeout
- * setting.
- * - force == false: Only instances that have a timeout will be removed.
- */
-void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
- struct hci_request *req, u8 instance,
- bool force)
-{
- struct adv_info *adv_instance, *n, *next_instance = NULL;
- int err;
- u8 rem_inst;
-
- /* Cancel any timeout concerning the removed instance(s). */
- if (!instance || hdev->cur_adv_instance == instance)
- cancel_adv_timeout(hdev);
-
- /* Get the next instance to advertise BEFORE we remove
- * the current one. This can be the same instance again
- * if there is only one instance.
- */
- if (instance && hdev->cur_adv_instance == instance)
- next_instance = hci_get_next_instance(hdev, instance);
-
- if (instance == 0x00) {
- list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
- list) {
- if (!(force || adv_instance->timeout))
- continue;
-
- rem_inst = adv_instance->instance;
- err = hci_remove_adv_instance(hdev, rem_inst);
- if (!err)
- mgmt_advertising_removed(sk, hdev, rem_inst);
- }
- } else {
- adv_instance = hci_find_adv_instance(hdev, instance);
-
- if (force || (adv_instance && adv_instance->timeout &&
- !adv_instance->remaining_time)) {
- /* Don't advertise a removed instance. */
- if (next_instance &&
- next_instance->instance == instance)
- next_instance = NULL;
-
- err = hci_remove_adv_instance(hdev, instance);
- if (!err)
- mgmt_advertising_removed(sk, hdev, instance);
- }
- }
-
- if (!req || !hdev_is_powered(hdev) ||
- hci_dev_test_flag(hdev, HCI_ADVERTISING))
- return;
-
- if (next_instance && !ext_adv_capable(hdev))
- __hci_req_schedule_adv_instance(req, next_instance->instance,
- false);
-}
-
-int hci_update_random_address(struct hci_request *req, bool require_privacy,
- bool use_rpa, u8 *own_addr_type)
-{
- struct hci_dev *hdev = req->hdev;
- int err;
-
- /* If privacy is enabled use a resolvable private address. If
- * current RPA has expired or there is something else than
- * the current RPA in use, then generate a new one.
- */
- if (use_rpa) {
- /* If Controller supports LL Privacy use own address type is
- * 0x03
- */
- if (use_ll_privacy(hdev))
- *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
- else
- *own_addr_type = ADDR_LE_DEV_RANDOM;
-
- if (rpa_valid(hdev))
- return 0;
-
- err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
- if (err < 0) {
- bt_dev_err(hdev, "failed to generate new RPA");
- return err;
- }
-
- set_random_addr(req, &hdev->rpa);
-
- return 0;
- }
-
- /* In case of required privacy without resolvable private address,
- * use an non-resolvable private address. This is useful for active
- * scanning and non-connectable advertising.
- */
- if (require_privacy) {
- bdaddr_t nrpa;
-
- while (true) {
- /* The non-resolvable private address is generated
- * from random six bytes with the two most significant
- * bits cleared.
- */
- get_random_bytes(&nrpa, 6);
- nrpa.b[5] &= 0x3f;
-
- /* The non-resolvable private address shall not be
- * equal to the public address.
- */
- if (bacmp(&hdev->bdaddr, &nrpa))
- break;
- }
-
- *own_addr_type = ADDR_LE_DEV_RANDOM;
- set_random_addr(req, &nrpa);
- return 0;
- }
-
- /* If forcing static address is in use or there is no public
- * address use the static address as random address (but skip
- * the HCI command if the current random address is already the
- * static one.
- *
- * In case BR/EDR has been disabled on a dual-mode controller
- * and a static address has been configured, then use that
- * address instead of the public BR/EDR address.
- */
- if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
- !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
- (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
- bacmp(&hdev->static_addr, BDADDR_ANY))) {
- *own_addr_type = ADDR_LE_DEV_RANDOM;
- if (bacmp(&hdev->static_addr, &hdev->random_addr))
- hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
- &hdev->static_addr);
- return 0;
- }
-
- /* Neither privacy nor static address is being used so use a
- * public address.
- */
- *own_addr_type = ADDR_LE_DEV_PUBLIC;
-
- return 0;
-}
-
-static bool disconnected_accept_list_entries(struct hci_dev *hdev)
-{
- struct bdaddr_list *b;
-
- list_for_each_entry(b, &hdev->accept_list, list) {
- struct hci_conn *conn;
-
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
- if (!conn)
- return true;
-
- if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
- return true;
- }
-
- return false;
-}
-
-void __hci_req_update_scan(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- u8 scan;
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- return;
-
- if (!hdev_is_powered(hdev))
- return;
-
- if (mgmt_powering_down(hdev))
- return;
-
- if (hdev->scanning_paused)
- return;
-
- if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) ||
- disconnected_accept_list_entries(hdev))
- scan = SCAN_PAGE;
- else
- scan = SCAN_DISABLED;
-
- if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
- scan |= SCAN_INQUIRY;
-
- if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) &&
- test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY))
- return;
-
- hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
-}
-
-static int update_scan(struct hci_request *req, unsigned long opt)
-{
- hci_dev_lock(req->hdev);
- __hci_req_update_scan(req);
- hci_dev_unlock(req->hdev);
- return 0;
-}
-
-static void scan_update_work(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev, scan_update);
-
- hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL);
-}
-
-static u8 get_service_classes(struct hci_dev *hdev)
-{
- struct bt_uuid *uuid;
- u8 val = 0;
-
- list_for_each_entry(uuid, &hdev->uuids, list)
- val |= uuid->svc_hint;
-
- return val;
-}
-
-void __hci_req_update_class(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- u8 cod[3];
-
- bt_dev_dbg(hdev, "");
-
- if (!hdev_is_powered(hdev))
- return;
-
- if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
- return;
-
- if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
- return;
-
- cod[0] = hdev->minor_class;
- cod[1] = hdev->major_class;
- cod[2] = get_service_classes(hdev);
-
- if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
- cod[1] |= 0x20;
-
- if (memcmp(cod, hdev->dev_class, 3) == 0)
- return;
-
- hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
-}
-
-static void write_iac(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct hci_cp_write_current_iac_lap cp;
-
- if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
- return;
-
- if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
- /* Limited discoverable mode */
- cp.num_iac = min_t(u8, hdev->num_iac, 2);
- cp.iac_lap[0] = 0x00; /* LIAC */
- cp.iac_lap[1] = 0x8b;
- cp.iac_lap[2] = 0x9e;
- cp.iac_lap[3] = 0x33; /* GIAC */
- cp.iac_lap[4] = 0x8b;
- cp.iac_lap[5] = 0x9e;
- } else {
- /* General discoverable mode */
- cp.num_iac = 1;
- cp.iac_lap[0] = 0x33; /* GIAC */
- cp.iac_lap[1] = 0x8b;
- cp.iac_lap[2] = 0x9e;
- }
-
- hci_req_add(req, HCI_OP_WRITE_CURRENT_IAC_LAP,
- (cp.num_iac * 3) + 1, &cp);
-}
-
-static int discoverable_update(struct hci_request *req, unsigned long opt)
-{
- struct hci_dev *hdev = req->hdev;
-
- hci_dev_lock(hdev);
-
- if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
- write_iac(req);
- __hci_req_update_scan(req);
- __hci_req_update_class(req);
- }
-
- /* Advertising instances don't use the global discoverable setting, so
- * only update AD if advertising was enabled using Set Advertising.
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
- __hci_req_update_adv_data(req, 0x00);
-
- /* Discoverable mode affects the local advertising
- * address in limited privacy mode.
- */
- if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) {
- if (ext_adv_capable(hdev))
- __hci_req_start_ext_adv(req, 0x00);
- else
- __hci_req_enable_advertising(req);
- }
- }
-
- hci_dev_unlock(hdev);
-
- return 0;
-}
-
-void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
- u8 reason)
-{
- switch (conn->state) {
- case BT_CONNECTED:
- case BT_CONFIG:
- if (conn->type == AMP_LINK) {
- struct hci_cp_disconn_phy_link cp;
-
- cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
- cp.reason = reason;
- hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp),
- &cp);
- } else {
- struct hci_cp_disconnect dc;
-
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = reason;
- hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
- }
-
- conn->state = BT_DISCONN;
-
- break;
- case BT_CONNECT:
- if (conn->type == LE_LINK) {
- if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- break;
- hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL,
- 0, NULL);
- } else if (conn->type == ACL_LINK) {
- if (req->hdev->hci_ver < BLUETOOTH_VER_1_2)
- break;
- hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL,
- 6, &conn->dst);
- }
- break;
- case BT_CONNECT2:
- if (conn->type == ACL_LINK) {
- struct hci_cp_reject_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
- rej.reason = reason;
-
- hci_req_add(req, HCI_OP_REJECT_CONN_REQ,
- sizeof(rej), &rej);
- } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
- struct hci_cp_reject_sync_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
-
- /* SCO rejection has its own limited set of
- * allowed error values (0x0D-0x0F) which isn't
- * compatible with most values passed to this
- * function. To be safe hard-code one of the
- * values that's suitable for SCO.
- */
- rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
-
- hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ,
- sizeof(rej), &rej);
- }
- break;
- default:
- conn->state = BT_CLOSED;
- break;
- }
-}
-
-static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
-{
- if (status)
- bt_dev_dbg(hdev, "Failed to abort connection: status 0x%2.2x", status);
-}
-
-int hci_abort_conn(struct hci_conn *conn, u8 reason)
-{
- struct hci_request req;
- int err;
-
- hci_req_init(&req, conn->hdev);
-
- __hci_abort_conn(&req, conn, reason);
-
- err = hci_req_run(&req, abort_conn_complete);
- if (err && err != -ENODATA) {
- bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err);
- return err;
- }
-
- return 0;
-}
-
-static int le_scan_disable(struct hci_request *req, unsigned long opt)
-{
- hci_req_add_le_scan_disable(req, false);
- return 0;
-}
-
-static int bredr_inquiry(struct hci_request *req, unsigned long opt)
-{
- u8 length = opt;
- const u8 giac[3] = { 0x33, 0x8b, 0x9e };
- const u8 liac[3] = { 0x00, 0x8b, 0x9e };
- struct hci_cp_inquiry cp;
-
- if (test_bit(HCI_INQUIRY, &req->hdev->flags))
- return 0;
-
- bt_dev_dbg(req->hdev, "");
-
- hci_dev_lock(req->hdev);
- hci_inquiry_cache_flush(req->hdev);
- hci_dev_unlock(req->hdev);
-
- memset(&cp, 0, sizeof(cp));
-
- if (req->hdev->discovery.limited)
- memcpy(&cp.lap, liac, sizeof(cp.lap));
- else
- memcpy(&cp.lap, giac, sizeof(cp.lap));
-
- cp.length = length;
-
- hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
-
- return 0;
-}
-
-static void le_scan_disable_work(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- le_scan_disable.work);
- u8 status;
-
- bt_dev_dbg(hdev, "");
-
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return;
-
- cancel_delayed_work(&hdev->le_scan_restart);
-
- hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
- if (status) {
- bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
- status);
- return;
- }
-
- hdev->discovery.scan_start = 0;
-
- /* If we were running LE only scan, change discovery state. If
- * we were running both LE and BR/EDR inquiry simultaneously,
- * and BR/EDR inquiry is already finished, stop discovery,
- * otherwise BR/EDR inquiry will stop discovery when finished.
- * If we will resolve remote device name, do not change
- * discovery state.
- */
-
- if (hdev->discovery.type == DISCOV_TYPE_LE)
- goto discov_stopped;
-
- if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
- return;
-
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
- if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
- hdev->discovery.state != DISCOVERY_RESOLVING)
- goto discov_stopped;
-
- return;
- }
-
- hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
- HCI_CMD_TIMEOUT, &status);
- if (status) {
- bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
- goto discov_stopped;
- }
-
- return;
-
-discov_stopped:
- hci_dev_lock(hdev);
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- hci_dev_unlock(hdev);
-}
-
-static int le_scan_restart(struct hci_request *req, unsigned long opt)
-{
- struct hci_dev *hdev = req->hdev;
-
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return 0;
-
- if (hdev->scanning_paused) {
- bt_dev_dbg(hdev, "Scanning is paused for suspend");
- return 0;
- }
-
- hci_req_add_le_scan_disable(req, false);
-
- if (use_ext_scan(hdev)) {
- struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
-
- memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
- ext_enable_cp.enable = LE_SCAN_ENABLE;
- ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-
- hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
- sizeof(ext_enable_cp), &ext_enable_cp);
- } else {
- struct hci_cp_le_set_scan_enable cp;
-
- memset(&cp, 0, sizeof(cp));
- cp.enable = LE_SCAN_ENABLE;
- cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
- hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
- }
-
- return 0;
-}
-
-static void le_scan_restart_work(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- le_scan_restart.work);
- unsigned long timeout, duration, scan_start, now;
- u8 status;
-
- bt_dev_dbg(hdev, "");
-
- hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
- if (status) {
- bt_dev_err(hdev, "failed to restart LE scan: status %d",
- status);
- return;
- }
-
- hci_dev_lock(hdev);
-
- if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
- !hdev->discovery.scan_start)
- goto unlock;
-
- /* When the scan was started, hdev->le_scan_disable has been queued
- * after duration from scan_start. During scan restart this job
- * has been canceled, and we need to queue it again after proper
- * timeout, to make sure that scan does not run indefinitely.
- */
- duration = hdev->discovery.scan_duration;
- scan_start = hdev->discovery.scan_start;
- now = jiffies;
- if (now - scan_start <= duration) {
- int elapsed;
-
- if (now >= scan_start)
- elapsed = now - scan_start;
- else
- elapsed = ULONG_MAX - scan_start + now;
-
- timeout = duration - elapsed;
- } else {
- timeout = 0;
- }
-
- queue_delayed_work(hdev->req_workqueue,
- &hdev->le_scan_disable, timeout);
-
-unlock:
- hci_dev_unlock(hdev);
-}
-
-static int active_scan(struct hci_request *req, unsigned long opt)
-{
- uint16_t interval = opt;
- struct hci_dev *hdev = req->hdev;
- u8 own_addr_type;
- /* Accept list is not used for discovery */
- u8 filter_policy = 0x00;
- /* Default is to enable duplicates filter */
- u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
- /* Discovery doesn't require controller address resolution */
- bool addr_resolv = false;
- int err;
-
- bt_dev_dbg(hdev, "");
-
- /* If controller is scanning, it means the background scanning is
- * running. Thus, we should temporarily stop it in order to set the
- * discovery scanning parameters.
- */
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- hci_req_add_le_scan_disable(req, false);
- cancel_interleave_scan(hdev);
- }
-
- /* All active scans will be done with either a resolvable private
- * address (when privacy feature has been enabled) or non-resolvable
- * private address.
- */
- err = hci_update_random_address(req, true, scan_use_rpa(hdev),
- &own_addr_type);
- if (err < 0)
- own_addr_type = ADDR_LE_DEV_PUBLIC;
-
- if (hci_is_adv_monitoring(hdev)) {
- /* Duplicate filter should be disabled when some advertisement
- * monitor is activated, otherwise AdvMon can only receive one
- * advertisement for one peer(*) during active scanning, and
- * might report loss to these peers.
- *
- * Note that different controllers have different meanings of
- * |duplicate|. Some of them consider packets with the same
- * address as duplicate, and others consider packets with the
- * same address and the same RSSI as duplicate. Although in the
- * latter case we don't need to disable duplicate filter, but
- * it is common to have active scanning for a short period of
- * time, the power impact should be neglectable.
- */
- filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
- }
-
- hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
- hdev->le_scan_window_discovery, own_addr_type,
- filter_policy, filter_dup, addr_resolv);
- return 0;
-}
-
-static int interleaved_discov(struct hci_request *req, unsigned long opt)
-{
- int err;
-
- bt_dev_dbg(req->hdev, "");
-
- err = active_scan(req, opt);
- if (err)
- return err;
-
- return bredr_inquiry(req, DISCOV_BREDR_INQUIRY_LEN);
-}
-
-static void start_discovery(struct hci_dev *hdev, u8 *status)
-{
- unsigned long timeout;
-
- bt_dev_dbg(hdev, "type %u", hdev->discovery.type);
-
- switch (hdev->discovery.type) {
- case DISCOV_TYPE_BREDR:
- if (!hci_dev_test_flag(hdev, HCI_INQUIRY))
- hci_req_sync(hdev, bredr_inquiry,
- DISCOV_BREDR_INQUIRY_LEN, HCI_CMD_TIMEOUT,
- status);
- return;
- case DISCOV_TYPE_INTERLEAVED:
- /* When running simultaneous discovery, the LE scanning time
- * should occupy the whole discovery time sine BR/EDR inquiry
- * and LE scanning are scheduled by the controller.
- *
- * For interleaving discovery in comparison, BR/EDR inquiry
- * and LE scanning are done sequentially with separate
- * timeouts.
- */
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
- &hdev->quirks)) {
- timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
- /* During simultaneous discovery, we double LE scan
- * interval. We must leave some time for the controller
- * to do BR/EDR inquiry.
- */
- hci_req_sync(hdev, interleaved_discov,
- hdev->le_scan_int_discovery * 2, HCI_CMD_TIMEOUT,
- status);
- break;
- }
-
- timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
- hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery,
- HCI_CMD_TIMEOUT, status);
- break;
- case DISCOV_TYPE_LE:
- timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
- hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery,
- HCI_CMD_TIMEOUT, status);
- break;
- default:
- *status = HCI_ERROR_UNSPECIFIED;
- return;
- }
-
- if (*status)
- return;
-
- bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout));
-
- /* When service discovery is used and the controller has a
- * strict duplicate filter, it is important to remember the
- * start and duration of the scan. This is required for
- * restarting scanning during the discovery phase.
- */
- if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
- hdev->discovery.result_filtering) {
- hdev->discovery.scan_start = jiffies;
- hdev->discovery.scan_duration = timeout;
- }
-
- queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable,
- timeout);
-}
-
-bool hci_req_stop_discovery(struct hci_request *req)
-{
- struct hci_dev *hdev = req->hdev;
- struct discovery_state *d = &hdev->discovery;
- struct hci_cp_remote_name_req_cancel cp;
- struct inquiry_entry *e;
- bool ret = false;
-
- bt_dev_dbg(hdev, "state %u", hdev->discovery.state);
-
- if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) {
- if (test_bit(HCI_INQUIRY, &hdev->flags))
- hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
-
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- cancel_delayed_work(&hdev->le_scan_disable);
- cancel_delayed_work(&hdev->le_scan_restart);
- hci_req_add_le_scan_disable(req, false);
- }
-
- ret = true;
- } else {
- /* Passive scanning */
- if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- hci_req_add_le_scan_disable(req, false);
- ret = true;
- }
- }
-
- /* No further actions needed for LE-only discovery */
- if (d->type == DISCOV_TYPE_LE)
- return ret;
-
- if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) {
- e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
- NAME_PENDING);
- if (!e)
- return ret;
-
- bacpy(&cp.bdaddr, &e->data.bdaddr);
- hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
- &cp);
- ret = true;
- }
-
- return ret;
-}
-
-static void config_data_path_complete(struct hci_dev *hdev, u8 status,
- u16 opcode)
-{
- bt_dev_dbg(hdev, "status %u", status);
-}
-
-int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec)
-{
- struct hci_request req;
- int err;
- __u8 vnd_len, *vnd_data = NULL;
- struct hci_op_configure_data_path *cmd = NULL;
-
- hci_req_init(&req, hdev);
-
- err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
- &vnd_data);
- if (err < 0)
- goto error;
-
- cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL);
- if (!cmd) {
- err = -ENOMEM;
- goto error;
- }
-
- err = hdev->get_data_path_id(hdev, &cmd->data_path_id);
- if (err < 0)
- goto error;
-
- cmd->vnd_len = vnd_len;
- memcpy(cmd->vnd_data, vnd_data, vnd_len);
-
- cmd->direction = 0x00;
- hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd);
-
- cmd->direction = 0x01;
- hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd);
-
- err = hci_req_run(&req, config_data_path_complete);
-error:
-
- kfree(cmd);
- kfree(vnd_data);
- return err;
-}
-
-static int stop_discovery(struct hci_request *req, unsigned long opt)
-{
- hci_dev_lock(req->hdev);
- hci_req_stop_discovery(req);
- hci_dev_unlock(req->hdev);
-
- return 0;
-}
-
-static void discov_update(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- discov_update);
- u8 status = 0;
-
- switch (hdev->discovery.state) {
- case DISCOVERY_STARTING:
- start_discovery(hdev, &status);
- mgmt_start_discovery_complete(hdev, status);
- if (status)
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- else
- hci_discovery_set_state(hdev, DISCOVERY_FINDING);
- break;
- case DISCOVERY_STOPPING:
- hci_req_sync(hdev, stop_discovery, 0, HCI_CMD_TIMEOUT, &status);
- mgmt_stop_discovery_complete(hdev, status);
- if (!status)
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- break;
- case DISCOVERY_STOPPED:
- default:
- return;
- }
-}
-
-static void discov_off(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- discov_off.work);
-
- bt_dev_dbg(hdev, "");
-
- hci_dev_lock(hdev);
-
- /* When discoverable timeout triggers, then just make sure
- * the limited discoverable flag is cleared. Even in the case
- * of a timeout triggered from general discoverable, it is
- * safe to unconditionally clear the flag.
- */
- hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
- hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
- hdev->discov_timeout = 0;
-
- hci_dev_unlock(hdev);
-
- hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, NULL);
- mgmt_new_settings(hdev);
-}
-
-static int powered_update_hci(struct hci_request *req, unsigned long opt)
-{
- struct hci_dev *hdev = req->hdev;
- u8 link_sec;
-
- hci_dev_lock(hdev);
-
- if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
- !lmp_host_ssp_capable(hdev)) {
- u8 mode = 0x01;
-
- hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
-
- if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
- u8 support = 0x01;
-
- hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT,
- sizeof(support), &support);
- }
- }
-
- if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
- lmp_bredr_capable(hdev)) {
- struct hci_cp_write_le_host_supported cp;
-
- cp.le = 0x01;
- cp.simul = 0x00;
-
- /* Check first if we already have the right
- * host state (host features set)
- */
- if (cp.le != lmp_host_le_capable(hdev) ||
- cp.simul != lmp_host_le_br_capable(hdev))
- hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
- sizeof(cp), &cp);
- }
-
- if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
- /* Make sure the controller has a good default for
- * advertising data. This also applies to the case
- * where BR/EDR was toggled during the AUTO_OFF phase.
- */
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
- list_empty(&hdev->adv_instances)) {
- int err;
-
- if (ext_adv_capable(hdev)) {
- err = __hci_req_setup_ext_adv_instance(req,
- 0x00);
- if (!err)
- __hci_req_update_scan_rsp_data(req,
- 0x00);
- } else {
- err = 0;
- __hci_req_update_adv_data(req, 0x00);
- __hci_req_update_scan_rsp_data(req, 0x00);
- }
-
- if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
- if (!ext_adv_capable(hdev))
- __hci_req_enable_advertising(req);
- else if (!err)
- __hci_req_enable_ext_advertising(req,
- 0x00);
- }
- } else if (!list_empty(&hdev->adv_instances)) {
- struct adv_info *adv_instance;
-
- adv_instance = list_first_entry(&hdev->adv_instances,
- struct adv_info, list);
- __hci_req_schedule_adv_instance(req,
- adv_instance->instance,
- true);
- }
- }
-
- link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY);
- if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
- hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE,
- sizeof(link_sec), &link_sec);
-
- if (lmp_bredr_capable(hdev)) {
- if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE))
- __hci_req_write_fast_connectable(req, true);
- else
- __hci_req_write_fast_connectable(req, false);
- __hci_req_update_scan(req);
- __hci_req_update_class(req);
- __hci_req_update_name(req);
- __hci_req_update_eir(req);
- }
-
- hci_dev_unlock(hdev);
- return 0;
-}
-
-int __hci_req_hci_power_on(struct hci_dev *hdev)
-{
- /* Register the available SMP channels (BR/EDR and LE) only when
- * successfully powering on the controller. This late
- * registration is required so that LE SMP can clearly decide if
- * the public address or static address is used.
- */
- smp_register(hdev);
-
- return __hci_req_sync(hdev, powered_update_hci, 0, HCI_CMD_TIMEOUT,
- NULL);
-}
-
void hci_request_setup(struct hci_dev *hdev)
{
- INIT_WORK(&hdev->discov_update, discov_update);
- INIT_WORK(&hdev->scan_update, scan_update_work);
- INIT_DELAYED_WORK(&hdev->discov_off, discov_off);
- INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
- INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
- INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work);
}
@@ -2644,16 +918,5 @@ void hci_request_cancel_all(struct hci_dev *hdev)
{
__hci_cmd_sync_cancel(hdev, ENODEV);
- cancel_work_sync(&hdev->discov_update);
- cancel_work_sync(&hdev->scan_update);
- cancel_delayed_work_sync(&hdev->discov_off);
- cancel_delayed_work_sync(&hdev->le_scan_disable);
- cancel_delayed_work_sync(&hdev->le_scan_restart);
-
- if (hdev->adv_instance_timeout) {
- cancel_delayed_work_sync(&hdev->adv_instance_expire);
- hdev->adv_instance_timeout = 0;
- }
-
cancel_interleave_scan(hdev);
}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 7f8df258e295..b9c5a9823837 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -68,61 +68,10 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param);
-int __hci_req_hci_power_on(struct hci_dev *hdev);
-
-void __hci_req_write_fast_connectable(struct hci_request *req, bool enable);
-void __hci_req_update_name(struct hci_request *req);
-void __hci_req_update_eir(struct hci_request *req);
-
void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn);
void hci_req_add_le_passive_scan(struct hci_request *req);
void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next);
-void hci_req_disable_address_resolution(struct hci_dev *hdev);
-void hci_req_reenable_advertising(struct hci_dev *hdev);
-void __hci_req_enable_advertising(struct hci_request *req);
-void __hci_req_disable_advertising(struct hci_request *req);
-void __hci_req_update_adv_data(struct hci_request *req, u8 instance);
-int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance);
-void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance);
-
-int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
- bool force);
-void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
- struct hci_request *req, u8 instance,
- bool force);
-
-int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance);
-int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
-int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance);
-int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance);
-int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance);
-void __hci_req_clear_ext_adv_sets(struct hci_request *req);
-int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
- bool use_rpa, struct adv_info *adv_instance,
- u8 *own_addr_type, bdaddr_t *rand_addr);
-
-void __hci_req_update_class(struct hci_request *req);
-
-/* Returns true if HCI commands were queued */
-bool hci_req_stop_discovery(struct hci_request *req);
-
-int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec);
-
-static inline void hci_req_update_scan(struct hci_dev *hdev)
-{
- queue_work(hdev->req_workqueue, &hdev->scan_update);
-}
-
-void __hci_req_update_scan(struct hci_request *req);
-
-int hci_update_random_address(struct hci_request *req, bool require_privacy,
- bool use_rpa, u8 *own_addr_type);
-
-int hci_abort_conn(struct hci_conn *conn, u8 reason);
-void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
- u8 reason);
-
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 33b3c0ffc339..06581223238c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -869,7 +869,8 @@ static int hci_sock_release(struct socket *sock)
hdev = hci_pi(sk)->hdev;
if (hdev) {
- if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
+ if (hci_pi(sk)->channel == HCI_CHANNEL_USER &&
+ !hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
/* When releasing a user channel exclusive access,
* call hci_dev_do_close directly instead of calling
* hci_dev_close to ensure the exclusive access will
@@ -878,6 +879,11 @@ static int hci_sock_release(struct socket *sock)
* The checking of HCI_AUTO_OFF is not needed in this
* case since it will have been cleared already when
* opening the user channel.
+ *
+ * Make sure to also check that we haven't already
+ * unregistered since all the cleanup will have already
+ * been complete and hdev will get released when we put
+ * below.
*/
hci_dev_do_close(hdev);
hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
@@ -1453,7 +1459,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1470,7 +1475,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (sk->sk_state == BT_CLOSED)
return 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
@@ -2057,6 +2062,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
static void hci_sock_destruct(struct sock *sk)
{
+ mgmt_cleanup(sk);
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 0feb68f12545..76c3107c9f91 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -246,7 +246,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode,
- PTR_ERR(skb));
+ PTR_ERR(skb));
return PTR_ERR(skb);
}
@@ -276,40 +276,37 @@ EXPORT_SYMBOL(__hci_cmd_sync_status);
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
- struct hci_cmd_sync_work_entry *entry;
- hci_cmd_sync_work_func_t func;
- hci_cmd_sync_work_destroy_t destroy;
- void *data;
bt_dev_dbg(hdev, "");
- mutex_lock(&hdev->cmd_sync_work_lock);
- entry = list_first_entry(&hdev->cmd_sync_work_list,
- struct hci_cmd_sync_work_entry, list);
- if (entry) {
- list_del(&entry->list);
- func = entry->func;
- data = entry->data;
- destroy = entry->destroy;
- kfree(entry);
- } else {
- func = NULL;
- data = NULL;
- destroy = NULL;
- }
- mutex_unlock(&hdev->cmd_sync_work_lock);
+ /* Dequeue all entries and run them */
+ while (1) {
+ struct hci_cmd_sync_work_entry *entry;
- if (func) {
- int err;
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ entry = list_first_entry_or_null(&hdev->cmd_sync_work_list,
+ struct hci_cmd_sync_work_entry,
+ list);
+ if (entry)
+ list_del(&entry->list);
+ mutex_unlock(&hdev->cmd_sync_work_lock);
+
+ if (!entry)
+ break;
- hci_req_sync_lock(hdev);
+ bt_dev_dbg(hdev, "entry %p", entry);
- err = func(hdev, data);
+ if (entry->func) {
+ int err;
- if (destroy)
- destroy(hdev, data, err);
+ hci_req_sync_lock(hdev);
+ err = entry->func(hdev, entry->data);
+ if (entry->destroy)
+ entry->destroy(hdev, entry->data, err);
+ hci_req_sync_unlock(hdev);
+ }
- hci_req_sync_unlock(hdev);
+ kfree(entry);
}
}
@@ -324,6 +321,307 @@ static void hci_cmd_sync_cancel_work(struct work_struct *work)
wake_up_interruptible(&hdev->req_wait_q);
}
+static int hci_scan_disable_sync(struct hci_dev *hdev);
+static int scan_disable_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_scan_disable_sync(hdev);
+}
+
+static int hci_inquiry_sync(struct hci_dev *hdev, u8 length);
+static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN);
+}
+
+static void le_scan_disable(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ le_scan_disable.work);
+ int status;
+
+ bt_dev_dbg(hdev, "");
+ hci_dev_lock(hdev);
+
+ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ goto _return;
+
+ cancel_delayed_work(&hdev->le_scan_restart);
+
+ status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL);
+ if (status) {
+ bt_dev_err(hdev, "failed to disable LE scan: %d", status);
+ goto _return;
+ }
+
+ hdev->discovery.scan_start = 0;
+
+ /* If we were running LE only scan, change discovery state. If
+ * we were running both LE and BR/EDR inquiry simultaneously,
+ * and BR/EDR inquiry is already finished, stop discovery,
+ * otherwise BR/EDR inquiry will stop discovery when finished.
+ * If we will resolve remote device name, do not change
+ * discovery state.
+ */
+
+ if (hdev->discovery.type == DISCOV_TYPE_LE)
+ goto discov_stopped;
+
+ if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
+ goto _return;
+
+ if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
+ if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
+ hdev->discovery.state != DISCOVERY_RESOLVING)
+ goto discov_stopped;
+
+ goto _return;
+ }
+
+ status = hci_cmd_sync_queue(hdev, interleaved_inquiry_sync, NULL, NULL);
+ if (status) {
+ bt_dev_err(hdev, "inquiry failed: status %d", status);
+ goto discov_stopped;
+ }
+
+ goto _return;
+
+discov_stopped:
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+
+_return:
+ hci_dev_unlock(hdev);
+}
+
+static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
+ u8 filter_dup);
+static int hci_le_scan_restart_sync(struct hci_dev *hdev)
+{
+ /* If controller is not scanning we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ return 0;
+
+ if (hdev->scanning_paused) {
+ bt_dev_dbg(hdev, "Scanning is paused for suspend");
+ return 0;
+ }
+
+ hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
+ return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
+ LE_SCAN_FILTER_DUP_ENABLE);
+}
+
+static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_le_scan_restart_sync(hdev);
+}
+
+static void le_scan_restart(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ le_scan_restart.work);
+ unsigned long timeout, duration, scan_start, now;
+ int status;
+
+ bt_dev_dbg(hdev, "");
+
+ hci_dev_lock(hdev);
+
+ status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
+ if (status) {
+ bt_dev_err(hdev, "failed to restart LE scan: status %d",
+ status);
+ goto unlock;
+ }
+
+ if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
+ !hdev->discovery.scan_start)
+ goto unlock;
+
+ /* When the scan was started, hdev->le_scan_disable has been queued
+ * after duration from scan_start. During scan restart this job
+ * has been canceled, and we need to queue it again after proper
+ * timeout, to make sure that scan does not run indefinitely.
+ */
+ duration = hdev->discovery.scan_duration;
+ scan_start = hdev->discovery.scan_start;
+ now = jiffies;
+ if (now - scan_start <= duration) {
+ int elapsed;
+
+ if (now >= scan_start)
+ elapsed = now - scan_start;
+ else
+ elapsed = ULONG_MAX - scan_start + now;
+
+ timeout = duration - elapsed;
+ } else {
+ timeout = 0;
+ }
+
+ queue_delayed_work(hdev->req_workqueue,
+ &hdev->le_scan_disable, timeout);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static int reenable_adv_sync(struct hci_dev *hdev, void *data)
+{
+ bt_dev_dbg(hdev, "");
+
+ if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
+ list_empty(&hdev->adv_instances))
+ return 0;
+
+ if (hdev->cur_adv_instance) {
+ return hci_schedule_adv_instance_sync(hdev,
+ hdev->cur_adv_instance,
+ true);
+ } else {
+ if (ext_adv_capable(hdev)) {
+ hci_start_ext_adv_sync(hdev, 0x00);
+ } else {
+ hci_update_adv_data_sync(hdev, 0x00);
+ hci_update_scan_rsp_data_sync(hdev, 0x00);
+ hci_enable_advertising_sync(hdev);
+ }
+ }
+
+ return 0;
+}
+
+static void reenable_adv(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ reenable_adv_work);
+ int status;
+
+ bt_dev_dbg(hdev, "");
+
+ hci_dev_lock(hdev);
+
+ status = hci_cmd_sync_queue(hdev, reenable_adv_sync, NULL, NULL);
+ if (status)
+ bt_dev_err(hdev, "failed to reenable ADV: %d", status);
+
+ hci_dev_unlock(hdev);
+}
+
+static void cancel_adv_timeout(struct hci_dev *hdev)
+{
+ if (hdev->adv_instance_timeout) {
+ hdev->adv_instance_timeout = 0;
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ }
+}
+
+/* For a single instance:
+ * - force == true: The instance will be removed even when its remaining
+ * lifetime is not zero.
+ * - force == false: the instance will be deactivated but kept stored unless
+ * the remaining lifetime is zero.
+ *
+ * For instance == 0x00:
+ * - force == true: All instances will be removed regardless of their timeout
+ * setting.
+ * - force == false: Only instances that have a timeout will be removed.
+ */
+int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk,
+ u8 instance, bool force)
+{
+ struct adv_info *adv_instance, *n, *next_instance = NULL;
+ int err;
+ u8 rem_inst;
+
+ /* Cancel any timeout concerning the removed instance(s). */
+ if (!instance || hdev->cur_adv_instance == instance)
+ cancel_adv_timeout(hdev);
+
+ /* Get the next instance to advertise BEFORE we remove
+ * the current one. This can be the same instance again
+ * if there is only one instance.
+ */
+ if (instance && hdev->cur_adv_instance == instance)
+ next_instance = hci_get_next_instance(hdev, instance);
+
+ if (instance == 0x00) {
+ list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances,
+ list) {
+ if (!(force || adv_instance->timeout))
+ continue;
+
+ rem_inst = adv_instance->instance;
+ err = hci_remove_adv_instance(hdev, rem_inst);
+ if (!err)
+ mgmt_advertising_removed(sk, hdev, rem_inst);
+ }
+ } else {
+ adv_instance = hci_find_adv_instance(hdev, instance);
+
+ if (force || (adv_instance && adv_instance->timeout &&
+ !adv_instance->remaining_time)) {
+ /* Don't advertise a removed instance. */
+ if (next_instance &&
+ next_instance->instance == instance)
+ next_instance = NULL;
+
+ err = hci_remove_adv_instance(hdev, instance);
+ if (!err)
+ mgmt_advertising_removed(sk, hdev, instance);
+ }
+ }
+
+ if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING))
+ return 0;
+
+ if (next_instance && !ext_adv_capable(hdev))
+ return hci_schedule_adv_instance_sync(hdev,
+ next_instance->instance,
+ false);
+
+ return 0;
+}
+
+static int adv_timeout_expire_sync(struct hci_dev *hdev, void *data)
+{
+ u8 instance = *(u8 *)data;
+
+ kfree(data);
+
+ hci_clear_adv_instance_sync(hdev, NULL, instance, false);
+
+ if (list_empty(&hdev->adv_instances))
+ return hci_disable_advertising_sync(hdev);
+
+ return 0;
+}
+
+static void adv_timeout_expire(struct work_struct *work)
+{
+ u8 *inst_ptr;
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ adv_instance_expire.work);
+
+ bt_dev_dbg(hdev, "");
+
+ hci_dev_lock(hdev);
+
+ hdev->adv_instance_timeout = 0;
+
+ if (hdev->cur_adv_instance == 0x00)
+ goto unlock;
+
+ inst_ptr = kmalloc(1, GFP_KERNEL);
+ if (!inst_ptr)
+ goto unlock;
+
+ *inst_ptr = hdev->cur_adv_instance;
+ hci_cmd_sync_queue(hdev, adv_timeout_expire_sync, inst_ptr, NULL);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
void hci_cmd_sync_init(struct hci_dev *hdev)
{
INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
@@ -331,6 +629,10 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
mutex_init(&hdev->cmd_sync_work_lock);
INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
+ INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
+ INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
+ INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
+ INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
}
void hci_cmd_sync_clear(struct hci_dev *hdev)
@@ -338,6 +640,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev)
struct hci_cmd_sync_work_entry *entry, *tmp;
cancel_work_sync(&hdev->cmd_sync_work);
+ cancel_work_sync(&hdev->reenable_adv_work);
list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) {
if (entry->destroy)
@@ -382,6 +685,9 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
{
struct hci_cmd_sync_work_entry *entry;
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return -ENODEV;
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
@@ -849,26 +1155,38 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
u8 data[HCI_MAX_EXT_AD_LENGTH];
} pdu;
u8 len;
+ struct adv_info *adv = NULL;
+ int err;
memset(&pdu, 0, sizeof(pdu));
- len = eir_create_scan_rsp(hdev, instance, pdu.data);
-
- if (hdev->scan_rsp_data_len == len &&
- !memcmp(pdu.data, hdev->scan_rsp_data, len))
- return 0;
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->scan_rsp_changed)
+ return 0;
+ }
- memcpy(hdev->scan_rsp_data, pdu.data, len);
- hdev->scan_rsp_data_len = len;
+ len = eir_create_scan_rsp(hdev, instance, pdu.data);
pdu.cp.handle = instance;
pdu.cp.length = len;
pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
- HCI_CMD_TIMEOUT);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
+ sizeof(pdu.cp) + len, &pdu.cp,
+ HCI_CMD_TIMEOUT);
+ if (err)
+ return err;
+
+ if (adv) {
+ adv->scan_rsp_changed = false;
+ } else {
+ memcpy(hdev->scan_rsp_data, pdu.data, len);
+ hdev->scan_rsp_data_len = len;
+ }
+
+ return 0;
}
static int __hci_set_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance)
@@ -965,6 +1283,187 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance)
return hci_enable_ext_advertising_sync(hdev, instance);
}
+static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_cp_le_set_per_adv_enable cp;
+
+ /* If periodic advertising already disabled there is nothing to do. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ return 0;
+
+ memset(&cp, 0, sizeof(cp));
+
+ cp.enable = 0x00;
+ cp.handle = instance;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance,
+ u16 min_interval, u16 max_interval)
+{
+ struct hci_cp_le_set_per_adv_params cp;
+
+ memset(&cp, 0, sizeof(cp));
+
+ if (!min_interval)
+ min_interval = DISCOV_LE_PER_ADV_INT_MIN;
+
+ if (!max_interval)
+ max_interval = DISCOV_LE_PER_ADV_INT_MAX;
+
+ cp.handle = instance;
+ cp.min_interval = cpu_to_le16(min_interval);
+ cp.max_interval = cpu_to_le16(max_interval);
+ cp.periodic_properties = 0x0000;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_PARAMS,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct {
+ struct hci_cp_le_set_per_adv_data cp;
+ u8 data[HCI_MAX_PER_AD_LENGTH];
+ } pdu;
+ u8 len;
+
+ memset(&pdu, 0, sizeof(pdu));
+
+ if (instance) {
+ struct adv_info *adv = hci_find_adv_instance(hdev, instance);
+
+ if (!adv || !adv->periodic)
+ return 0;
+ }
+
+ len = eir_create_per_adv_data(hdev, instance, pdu.data);
+
+ pdu.cp.length = len;
+ pdu.cp.handle = instance;
+ pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA,
+ sizeof(pdu.cp) + len, &pdu,
+ HCI_CMD_TIMEOUT);
+}
+
+static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_cp_le_set_per_adv_enable cp;
+
+ /* If periodic advertising already enabled there is nothing to do. */
+ if (hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ return 0;
+
+ memset(&cp, 0, sizeof(cp));
+
+ cp.enable = 0x01;
+ cp.handle = instance;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+/* Checks if periodic advertising data contains a Basic Announcement and if it
+ * does generates a Broadcast ID and add Broadcast Announcement.
+ */
+static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
+{
+ u8 bid[3];
+ u8 ad[4 + 3];
+
+ /* Skip if NULL adv as instance 0x00 is used for general purpose
+ * advertising so it cannot used for the likes of Broadcast Announcement
+ * as it can be overwritten at any point.
+ */
+ if (!adv)
+ return 0;
+
+ /* Check if PA data doesn't contains a Basic Audio Announcement then
+ * there is nothing to do.
+ */
+ if (!eir_get_service_data(adv->per_adv_data, adv->per_adv_data_len,
+ 0x1851, NULL))
+ return 0;
+
+ /* Check if advertising data already has a Broadcast Announcement since
+ * the process may want to control the Broadcast ID directly and in that
+ * case the kernel shall no interfere.
+ */
+ if (eir_get_service_data(adv->adv_data, adv->adv_data_len, 0x1852,
+ NULL))
+ return 0;
+
+ /* Generate Broadcast ID */
+ get_random_bytes(bid, sizeof(bid));
+ eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL);
+
+ return hci_update_adv_data_sync(hdev, adv->instance);
+}
+
+int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
+ u8 *data, u32 flags, u16 min_interval,
+ u16 max_interval, u16 sync_interval)
+{
+ struct adv_info *adv = NULL;
+ int err;
+ bool added = false;
+
+ hci_disable_per_advertising_sync(hdev, instance);
+
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ /* Create an instance if that could not be found */
+ if (!adv) {
+ adv = hci_add_per_instance(hdev, instance, flags,
+ data_len, data,
+ sync_interval,
+ sync_interval);
+ if (IS_ERR(adv))
+ return PTR_ERR(adv);
+ added = true;
+ }
+ }
+
+ /* Only start advertising if instance 0 or if a dedicated instance has
+ * been added.
+ */
+ if (!adv || added) {
+ err = hci_start_ext_adv_sync(hdev, instance);
+ if (err < 0)
+ goto fail;
+
+ err = hci_adv_bcast_annoucement(hdev, adv);
+ if (err < 0)
+ goto fail;
+ }
+
+ err = hci_set_per_adv_params_sync(hdev, instance, min_interval,
+ max_interval);
+ if (err < 0)
+ goto fail;
+
+ err = hci_set_per_adv_data_sync(hdev, instance);
+ if (err < 0)
+ goto fail;
+
+ err = hci_enable_per_advertising_sync(hdev, instance);
+ if (err < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ if (added)
+ hci_remove_adv_instance(hdev, instance);
+
+ return err;
+}
+
static int hci_start_adv_sync(struct hci_dev *hdev, u8 instance)
{
int err;
@@ -1104,12 +1603,40 @@ int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance,
HCI_CMD_TIMEOUT, sk);
}
-static void cancel_adv_timeout(struct hci_dev *hdev)
+static int remove_ext_adv_sync(struct hci_dev *hdev, void *data)
{
- if (hdev->adv_instance_timeout) {
- hdev->adv_instance_timeout = 0;
- cancel_delayed_work(&hdev->adv_instance_expire);
+ struct adv_info *adv = data;
+ u8 instance = 0;
+
+ if (adv)
+ instance = adv->instance;
+
+ return hci_remove_ext_adv_instance_sync(hdev, instance, NULL);
+}
+
+int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance)
+{
+ struct adv_info *adv = NULL;
+
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv)
+ return -EINVAL;
}
+
+ return hci_cmd_sync_queue(hdev, remove_ext_adv_sync, adv, NULL);
+}
+
+int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
+{
+ struct hci_cp_le_term_big cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = handle;
+ cp.reason = reason;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_TERM_BIG,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
@@ -1119,27 +1646,39 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
u8 data[HCI_MAX_EXT_AD_LENGTH];
} pdu;
u8 len;
+ struct adv_info *adv = NULL;
+ int err;
memset(&pdu, 0, sizeof(pdu));
- len = eir_create_adv_data(hdev, instance, pdu.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(pdu.data, hdev->adv_data, len) == 0)
- return 0;
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->adv_data_changed)
+ return 0;
+ }
- memcpy(hdev->adv_data, pdu.data, len);
- hdev->adv_data_len = len;
+ len = eir_create_adv_data(hdev, instance, pdu.data);
pdu.cp.length = len;
pdu.cp.handle = instance;
pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- sizeof(pdu.cp) + len, &pdu.cp,
- HCI_CMD_TIMEOUT);
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
+ sizeof(pdu.cp) + len, &pdu.cp,
+ HCI_CMD_TIMEOUT);
+ if (err)
+ return err;
+
+ /* Update data if the command succeed */
+ if (adv) {
+ adv->adv_data_changed = false;
+ } else {
+ memcpy(hdev->adv_data, pdu.data, len);
+ hdev->adv_data_len = len;
+ }
+
+ return 0;
}
static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
@@ -1251,10 +1790,13 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
struct adv_info *adv, *n;
+ int err = 0;
if (ext_adv_capable(hdev))
/* Remove all existing sets */
- return hci_clear_adv_sets_sync(hdev, sk);
+ err = hci_clear_adv_sets_sync(hdev, sk);
+ if (ext_adv_capable(hdev))
+ return err;
/* This is safe as long as there is no command send while the lock is
* held.
@@ -1282,11 +1824,13 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk)
{
- int err;
+ int err = 0;
/* If we use extended advertising, instance has to be removed first. */
if (ext_adv_capable(hdev))
- return hci_remove_ext_adv_instance_sync(hdev, instance, sk);
+ err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
+ if (ext_adv_capable(hdev))
+ return err;
/* This is safe as long as there is no command send while the lock is
* held.
@@ -1385,13 +1929,16 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
int hci_disable_advertising_sync(struct hci_dev *hdev)
{
u8 enable = 0x00;
+ int err = 0;
/* If controller is not advertising we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
return 0;
if (ext_adv_capable(hdev))
- return hci_disable_ext_adv_instance_sync(hdev, 0x00);
+ err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
+ if (ext_adv_capable(hdev))
+ return err;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
sizeof(enable), &enable, HCI_CMD_TIMEOUT);
@@ -1404,7 +1951,11 @@ static int hci_le_set_ext_scan_enable_sync(struct hci_dev *hdev, u8 val,
memset(&cp, 0, sizeof(cp));
cp.enable = val;
- cp.filter_dup = filter_dup;
+
+ if (hci_dev_test_flag(hdev, HCI_MESH))
+ cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
+ else
+ cp.filter_dup = filter_dup;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
@@ -1420,7 +1971,11 @@ static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
memset(&cp, 0, sizeof(cp));
cp.enable = val;
- cp.filter_dup = filter_dup;
+
+ if (val && hci_dev_test_flag(hdev, HCI_MESH))
+ cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
+ else
+ cp.filter_dup = filter_dup;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_ENABLE,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
@@ -1612,6 +2167,9 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
bacpy(&cp.bdaddr, &params->addr);
memcpy(cp.peer_irk, irk->val, 16);
+ /* Default privacy mode is always Network */
+ params->privacy_mode = HCI_NETWORK_PRIVACY;
+
done:
if (hci_dev_test_flag(hdev, HCI_PRIVACY))
memcpy(cp.local_irk, hdev->irk, 16);
@@ -1637,7 +2195,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
* indicates that LL Privacy has been enabled and
* HCI_OP_LE_SET_PRIVACY_MODE is supported.
*/
- if (!test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, params->flags))
+ if (!(params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY))
return 0;
irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type);
@@ -1664,20 +2222,19 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
struct hci_cp_le_add_to_accept_list cp;
int err;
+ /* During suspend, only wakeable devices can be in acceptlist */
+ if (hdev->suspended &&
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ return 0;
+
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
return -ENOSPC;
/* Accept list can not be used with RPAs */
if (!use_ll_privacy(hdev) &&
- hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
+ hci_find_irk_by_addr(hdev, &params->addr, params->addr_type))
return -EINVAL;
- }
-
- /* During suspend, only wakeable devices can be in acceptlist */
- if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
- return 0;
/* Attempt to program the device in the resolving list first to avoid
* having to rollback in case it fails since the resolving list is
@@ -1841,6 +2398,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
+ u8 filter_policy;
int err;
/* Pause advertising if resolving list can be used as controllers are
@@ -1865,12 +2423,15 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
}
/* Go through the current accept list programmed into the
- * controller one by one and check if that address is still
- * in the list of pending connections or list of devices to
+ * controller one by one and check if that address is connected or is
+ * still in the list of pending connections or list of devices to
* report. If not present in either list, then remove it from
* the controller.
*/
list_for_each_entry_safe(b, t, &hdev->le_accept_list, list) {
+ if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
+ continue;
+
pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
&b->bdaddr,
b->bdaddr_type);
@@ -1927,6 +2488,8 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
err = -EINVAL;
done:
+ filter_policy = err ? 0x00 : 0x01;
+
/* Enable address resolution when LL Privacy is enabled. */
err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01);
if (err)
@@ -1937,7 +2500,7 @@ done:
hci_resume_advertising_sync(hdev);
/* Select filter policy to use accept list */
- return err ? 0x00 : 0x01;
+ return filter_policy;
}
/* Returns true if an le connection is in the scanning state */
@@ -2051,6 +2614,7 @@ static int hci_passive_scan_sync(struct hci_dev *hdev)
u8 own_addr_type;
u8 filter_policy;
u16 window, interval;
+ u8 filter_dups = LE_SCAN_FILTER_DUP_ENABLE;
int err;
if (hdev->scanning_paused) {
@@ -2113,11 +2677,16 @@ static int hci_passive_scan_sync(struct hci_dev *hdev)
interval = hdev->le_scan_interval;
}
+ /* Disable all filtering for Mesh */
+ if (hci_dev_test_flag(hdev, HCI_MESH)) {
+ filter_policy = 0;
+ filter_dups = LE_SCAN_FILTER_DUP_DISABLE;
+ }
+
bt_dev_dbg(hdev, "LE passive scan with acceptlist = %d", filter_policy);
return hci_start_scan_sync(hdev, LE_SCAN_PASSIVE, interval, window,
- own_addr_type, filter_policy,
- LE_SCAN_FILTER_DUP_ENABLE);
+ own_addr_type, filter_policy, filter_dups);
}
/* This function controls the passive scanning based on hdev->pend_le_conns
@@ -2167,9 +2736,11 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "ADV monitoring is %s",
hci_is_adv_monitoring(hdev) ? "on" : "off");
- if (list_empty(&hdev->pend_le_conns) &&
+ if (!hci_dev_test_flag(hdev, HCI_MESH) &&
+ list_empty(&hdev->pend_le_conns) &&
list_empty(&hdev->pend_le_reports) &&
- !hci_is_adv_monitoring(hdev)) {
+ !hci_is_adv_monitoring(hdev) &&
+ !hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
/* If there is no pending LE connections or devices
* to be scanned for or no ADV monitors, we should stop the
* background scanning.
@@ -2204,6 +2775,16 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev)
return err;
}
+static int update_scan_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_update_scan_sync(hdev);
+}
+
+int hci_update_scan(struct hci_dev *hdev)
+{
+ return hci_cmd_sync_queue(hdev, update_scan_sync, NULL, NULL);
+}
+
static int update_passive_scan_sync(struct hci_dev *hdev, void *data)
{
return hci_update_passive_scan_sync(hdev);
@@ -2806,6 +3387,9 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type,
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return 0;
+ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ return 0;
+
memset(&cp, 0, sizeof(cp));
cp.flt_type = flt_type;
@@ -2826,6 +3410,13 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev)
if (!hci_dev_test_flag(hdev, HCI_EVENT_FILTER_CONFIGURED))
return 0;
+ /* In theory the state machine should not reach here unless
+ * a hci_set_event_filter_sync() call succeeds, but we do
+ * the check both for parity and as a future reminder.
+ */
+ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ return 0;
+
return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00,
BDADDR_ANY, 0x00);
}
@@ -2961,6 +3552,12 @@ static const struct hci_init_stage hci_init2[] = {
/* Read LE Buffer Size */
static int hci_le_read_buffer_size_sync(struct hci_dev *hdev)
{
+ /* Use Read LE Buffer Size V2 if supported */
+ if (hdev->commands[41] & 0x20)
+ return __hci_cmd_sync_status(hdev,
+ HCI_OP_LE_READ_BUFFER_SIZE_V2,
+ 0, NULL, HCI_CMD_TIMEOUT);
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_BUFFER_SIZE,
0, NULL, HCI_CMD_TIMEOUT);
}
@@ -2999,6 +3596,10 @@ static int hci_init2_sync(struct hci_dev *hdev)
if (hdev->dev_type == HCI_AMP)
return hci_init_stage_sync(hdev, amp_init2);
+ err = hci_init_stage_sync(hdev, hci_init2);
+ if (err)
+ return err;
+
if (lmp_bredr_capable(hdev)) {
err = hci_init_stage_sync(hdev, br_init2);
if (err)
@@ -3016,7 +3617,7 @@ static int hci_init2_sync(struct hci_dev *hdev)
hci_dev_set_flag(hdev, HCI_LE_ENABLED);
}
- return hci_init_stage_sync(hdev, hci_init2);
+ return 0;
}
static int hci_set_event_mask_sync(struct hci_dev *hdev)
@@ -3179,7 +3780,7 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev)
static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[18] & 0x04) ||
- test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -3262,10 +3863,10 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
events[0] |= 0x40; /* LE Data Length Change */
- /* If the controller supports LL Privacy feature, enable
- * the corresponding event.
+ /* If the controller supports LL Privacy feature or LE Extended Adv,
+ * enable the corresponding event.
*/
- if (hdev->le_features[0] & HCI_LE_LL_PRIVACY)
+ if (use_enhanced_conn_complete(hdev))
events[1] |= 0x02; /* LE Enhanced Connection Complete */
/* If the controller supports Extended Scanner Filter
@@ -3337,6 +3938,19 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
if (ext_adv_capable(hdev))
events[2] |= 0x02; /* LE Advertising Set Terminated */
+ if (cis_capable(hdev)) {
+ events[3] |= 0x01; /* LE CIS Established */
+ if (cis_peripheral_capable(hdev))
+ events[3] |= 0x02; /* LE CIS Request */
+ }
+
+ if (bis_capable(hdev)) {
+ events[3] |= 0x04; /* LE Create BIG Complete */
+ events[3] |= 0x08; /* LE Terminate BIG Complete */
+ events[3] |= 0x10; /* LE BIG Sync Established */
+ events[3] |= 0x20; /* LE BIG Sync Loss */
+ }
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK,
sizeof(events), events, HCI_CMD_TIMEOUT);
}
@@ -3477,6 +4091,24 @@ static int hci_set_le_support_sync(struct hci_dev *hdev)
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+/* LE Set Host Feature */
+static int hci_le_set_host_feature_sync(struct hci_dev *hdev)
+{
+ struct hci_cp_le_set_host_feature cp;
+
+ if (!iso_capable(hdev))
+ return 0;
+
+ memset(&cp, 0, sizeof(cp));
+
+ /* Isochronous Channels (Host Support) */
+ cp.bit_number = 32;
+ cp.bit_value = 1;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_HOST_FEATURE,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
/* LE Controller init stage 3 command sequence */
static const struct hci_init_stage le_init3[] = {
/* HCI_OP_LE_SET_EVENT_MASK */
@@ -3503,6 +4135,8 @@ static const struct hci_init_stage le_init3[] = {
HCI_INIT(hci_le_read_num_support_adv_sets_sync),
/* HCI_OP_WRITE_LE_HOST_SUPPORTED */
HCI_INIT(hci_set_le_support_sync),
+ /* HCI_OP_LE_SET_HOST_FEATURE */
+ HCI_INIT(hci_le_set_host_feature_sync),
{}
};
@@ -3566,7 +4200,7 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev)
if (lmp_cpb_central_capable(hdev)) {
events[1] |= 0x40; /* Triggered Clock Capture */
events[1] |= 0x80; /* Synchronization Train Complete */
- events[2] |= 0x10; /* Peripheral Page Response Timeout */
+ events[2] |= 0x08; /* Truncated Page Complete */
events[2] |= 0x20; /* CPB Channel Map Change */
changed = true;
}
@@ -3578,7 +4212,7 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev)
events[2] |= 0x01; /* Synchronization Train Received */
events[2] |= 0x02; /* CPB Receive */
events[2] |= 0x04; /* CPB Timeout */
- events[2] |= 0x08; /* Truncated Page Complete */
+ events[2] |= 0x10; /* Peripheral Page Response Timeout */
changed = true;
}
@@ -3664,7 +4298,7 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev)
bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED);
if (!(hdev->commands[18] & 0x08) ||
- test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
return 0;
if (enabled == hdev->err_data_reporting)
@@ -3812,83 +4446,61 @@ static int hci_init_sync(struct hci_dev *hdev)
return 0;
}
-int hci_dev_open_sync(struct hci_dev *hdev)
+#define HCI_QUIRK_BROKEN(_quirk, _desc) { HCI_QUIRK_BROKEN_##_quirk, _desc }
+
+static const struct {
+ unsigned long quirk;
+ const char *desc;
+} hci_broken_table[] = {
+ HCI_QUIRK_BROKEN(LOCAL_COMMANDS,
+ "HCI Read Local Supported Commands not supported"),
+ HCI_QUIRK_BROKEN(STORED_LINK_KEY,
+ "HCI Delete Stored Link Key command is advertised, "
+ "but not supported."),
+ HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
+ "HCI Read Transmit Power Level command is advertised, "
+ "but not supported."),
+ HCI_QUIRK_BROKEN(FILTER_CLEAR_ALL,
+ "HCI Set Event Filter command not supported."),
+ HCI_QUIRK_BROKEN(ENHANCED_SETUP_SYNC_CONN,
+ "HCI Enhanced Setup Synchronous Connection command is "
+ "advertised, but not supported.")
+};
+
+/* This function handles hdev setup stage:
+ *
+ * Calls hdev->setup
+ * Setup address if HCI_QUIRK_USE_BDADDR_PROPERTY is set.
+ */
+static int hci_dev_setup_sync(struct hci_dev *hdev)
{
int ret = 0;
-
- bt_dev_dbg(hdev, "");
-
- if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
- ret = -ENODEV;
- goto done;
- }
+ bool invalid_bdaddr;
+ size_t i;
if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
- !hci_dev_test_flag(hdev, HCI_CONFIG)) {
- /* Check for rfkill but allow the HCI setup stage to
- * proceed (which in itself doesn't cause any RF activity).
- */
- if (hci_dev_test_flag(hdev, HCI_RFKILLED)) {
- ret = -ERFKILL;
- goto done;
- }
-
- /* Check for valid public address or a configured static
- * random address, but let the HCI setup proceed to
- * be able to determine if there is a public address
- * or not.
- *
- * In case of user channel usage, it is not important
- * if a public address or static random address is
- * available.
- *
- * This check is only valid for BR/EDR controllers
- * since AMP controllers do not have an address.
- */
- if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- hdev->dev_type == HCI_PRIMARY &&
- !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
- !bacmp(&hdev->static_addr, BDADDR_ANY)) {
- ret = -EADDRNOTAVAIL;
- goto done;
- }
- }
-
- if (test_bit(HCI_UP, &hdev->flags)) {
- ret = -EALREADY;
- goto done;
- }
-
- if (hdev->open(hdev)) {
- ret = -EIO;
- goto done;
- }
-
- set_bit(HCI_RUNNING, &hdev->flags);
- hci_sock_dev_event(hdev, HCI_DEV_OPEN);
-
- atomic_set(&hdev->cmd_cnt, 1);
- set_bit(HCI_INIT, &hdev->flags);
+ !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks))
+ return 0;
- if (hci_dev_test_flag(hdev, HCI_SETUP) ||
- test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) {
- bool invalid_bdaddr;
+ bt_dev_dbg(hdev, "");
- hci_sock_dev_event(hdev, HCI_DEV_SETUP);
+ hci_sock_dev_event(hdev, HCI_DEV_SETUP);
- if (hdev->setup)
- ret = hdev->setup(hdev);
+ if (hdev->setup)
+ ret = hdev->setup(hdev);
- /* The transport driver can set the quirk to mark the
- * BD_ADDR invalid before creating the HCI device or in
- * its setup callback.
- */
- invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR,
- &hdev->quirks);
+ for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) {
+ if (test_bit(hci_broken_table[i].quirk, &hdev->quirks))
+ bt_dev_warn(hdev, "%s", hci_broken_table[i].desc);
+ }
- if (ret)
- goto setup_failed;
+ /* The transport driver can set the quirk to mark the
+ * BD_ADDR invalid before creating the HCI device or in
+ * its setup callback.
+ */
+ invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ if (!ret) {
if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) {
if (!bacmp(&hdev->public_addr, BDADDR_ANY))
hci_dev_get_bd_addr_from_property(hdev);
@@ -3907,33 +4519,51 @@ int hci_dev_open_sync(struct hci_dev *hdev)
invalid_bdaddr = false;
}
}
+ }
-setup_failed:
- /* The transport driver can set these quirks before
- * creating the HCI device or in its setup callback.
- *
- * For the invalid BD_ADDR quirk it is possible that
- * it becomes a valid address if the bootloader does
- * provide it (see above).
- *
- * In case any of them is set, the controller has to
- * start up as unconfigured.
- */
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
- invalid_bdaddr)
- hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
+ /* The transport driver can set these quirks before
+ * creating the HCI device or in its setup callback.
+ *
+ * For the invalid BD_ADDR quirk it is possible that
+ * it becomes a valid address if the bootloader does
+ * provide it (see above).
+ *
+ * In case any of them is set, the controller has to
+ * start up as unconfigured.
+ */
+ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
+ invalid_bdaddr)
+ hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
- /* For an unconfigured controller it is required to
- * read at least the version information provided by
- * the Read Local Version Information command.
- *
- * If the set_bdaddr driver callback is provided, then
- * also the original Bluetooth public device address
- * will be read using the Read BD Address command.
- */
- if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
- ret = hci_unconf_init_sync(hdev);
- }
+ /* For an unconfigured controller it is required to
+ * read at least the version information provided by
+ * the Read Local Version Information command.
+ *
+ * If the set_bdaddr driver callback is provided, then
+ * also the original Bluetooth public device address
+ * will be read using the Read BD Address command.
+ */
+ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
+ return hci_unconf_init_sync(hdev);
+
+ return ret;
+}
+
+/* This function handles hdev init stage:
+ *
+ * Calls hci_dev_setup_sync to perform setup stage
+ * Calls hci_init_sync to perform HCI command init sequence
+ */
+static int hci_dev_init_sync(struct hci_dev *hdev)
+{
+ int ret;
+
+ bt_dev_dbg(hdev, "");
+
+ atomic_set(&hdev->cmd_cnt, 1);
+ set_bit(HCI_INIT, &hdev->flags);
+
+ ret = hci_dev_setup_sync(hdev);
if (hci_dev_test_flag(hdev, HCI_CONFIG)) {
/* If public address change is configured, ensure that
@@ -3973,6 +4603,65 @@ setup_failed:
clear_bit(HCI_INIT, &hdev->flags);
+ return ret;
+}
+
+int hci_dev_open_sync(struct hci_dev *hdev)
+{
+ int ret;
+
+ bt_dev_dbg(hdev, "");
+
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ ret = -ENODEV;
+ goto done;
+ }
+
+ if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
+ !hci_dev_test_flag(hdev, HCI_CONFIG)) {
+ /* Check for rfkill but allow the HCI setup stage to
+ * proceed (which in itself doesn't cause any RF activity).
+ */
+ if (hci_dev_test_flag(hdev, HCI_RFKILLED)) {
+ ret = -ERFKILL;
+ goto done;
+ }
+
+ /* Check for valid public address or a configured static
+ * random address, but let the HCI setup proceed to
+ * be able to determine if there is a public address
+ * or not.
+ *
+ * In case of user channel usage, it is not important
+ * if a public address or static random address is
+ * available.
+ *
+ * This check is only valid for BR/EDR controllers
+ * since AMP controllers do not have an address.
+ */
+ if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ hdev->dev_type == HCI_PRIMARY &&
+ !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
+ !bacmp(&hdev->static_addr, BDADDR_ANY)) {
+ ret = -EADDRNOTAVAIL;
+ goto done;
+ }
+ }
+
+ if (test_bit(HCI_UP, &hdev->flags)) {
+ ret = -EALREADY;
+ goto done;
+ }
+
+ if (hdev->open(hdev)) {
+ ret = -EIO;
+ goto done;
+ }
+
+ set_bit(HCI_RUNNING, &hdev->flags);
+ hci_sock_dev_event(hdev, HCI_DEV_OPEN);
+
+ ret = hci_dev_init_sync(hdev);
if (!ret) {
hci_dev_hold(hdev);
hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
@@ -3987,6 +4676,7 @@ setup_failed:
hci_dev_test_flag(hdev, HCI_MGMT) &&
hdev->dev_type == HCI_PRIMARY) {
ret = hci_powered_update_sync(hdev);
+ mgmt_power_on(hdev, ret);
}
} else {
/* Init failed, cleanup */
@@ -4038,6 +4728,31 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
BT_DBG("All LE pending actions cleared");
}
+static int hci_dev_shutdown(struct hci_dev *hdev)
+{
+ int err = 0;
+ /* Similar to how we first do setup and then set the exclusive access
+ * bit for userspace, we must first unset userchannel and then clean up.
+ * Otherwise, the kernel can't properly use the hci channel to clean up
+ * the controller (some shutdown routines require sending additional
+ * commands to the controller for example).
+ */
+ bool was_userchannel =
+ hci_dev_test_and_clear_flag(hdev, HCI_USER_CHANNEL);
+
+ if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+ test_bit(HCI_UP, &hdev->flags)) {
+ /* Execute vendor specific shutdown routine */
+ if (hdev->shutdown)
+ err = hdev->shutdown(hdev);
+ }
+
+ if (was_userchannel)
+ hci_dev_set_flag(hdev, HCI_USER_CHANNEL);
+
+ return err;
+}
+
int hci_dev_close_sync(struct hci_dev *hdev)
{
bool auto_off;
@@ -4047,17 +4762,18 @@ int hci_dev_close_sync(struct hci_dev *hdev)
cancel_delayed_work(&hdev->power_off);
cancel_delayed_work(&hdev->ncmd_timer);
+ cancel_delayed_work(&hdev->le_scan_disable);
+ cancel_delayed_work(&hdev->le_scan_restart);
hci_request_cancel_all(hdev);
- if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
- !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
- test_bit(HCI_UP, &hdev->flags)) {
- /* Execute vendor specific shutdown routine */
- if (hdev->shutdown)
- err = hdev->shutdown(hdev);
+ if (hdev->adv_instance_timeout) {
+ cancel_delayed_work_sync(&hdev->adv_instance_expire);
+ hdev->adv_instance_timeout = 0;
}
+ err = hci_dev_shutdown(hdev);
+
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
cancel_delayed_work_sync(&hdev->cmd_timer);
return err;
@@ -4106,9 +4822,9 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
hci_conn_hash_flush(hdev);
- hci_dev_unlock(hdev);
-
+ /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */
smp_unregister(hdev);
+ hci_dev_unlock(hdev);
hci_sock_dev_event(hdev, HCI_DEV_DOWN);
@@ -4392,15 +5108,25 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
-static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
- u8 reason)
+int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
+ int err;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
return hci_disconnect_sync(hdev, conn, reason);
case BT_CONNECT:
- return hci_connect_cancel_sync(hdev, conn);
+ err = hci_connect_cancel_sync(hdev, conn);
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync.
+ */
+ if (err) {
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, err);
+ hci_dev_unlock(hdev);
+ }
+ return err;
case BT_CONNECT2:
return hci_reject_conn_sync(hdev, conn, reason);
default:
@@ -4422,7 +5148,7 @@ static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
return err;
}
- return err;
+ return 0;
}
/* This function perform power off HCI command sequence as follows:
@@ -4645,7 +5371,7 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval)
/* Pause advertising since active scanning disables address resolution
* which advertising depend on in order to generate its RPAs.
*/
- if (use_ll_privacy(hdev)) {
+ if (use_ll_privacy(hdev) && hci_dev_test_flag(hdev, HCI_PRIVACY)) {
err = hci_pause_advertising_sync(hdev);
if (err) {
bt_dev_err(hdev, "pause advertising failed: %d", err);
@@ -4825,11 +5551,17 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return 0;
+ /* Some fake CSR controllers lock up after setting this type of
+ * filter, so avoid sending the request altogether.
+ */
+ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ return 0;
+
/* Always clear event filter when starting */
hci_clear_event_filter_sync(hdev);
list_for_each_entry(b, &hdev->accept_list, list) {
- if (!test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, b->flags))
+ if (!(b->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
continue;
bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr);
@@ -4853,10 +5585,28 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
return 0;
}
+/* This function disables scan (BR and LE) and mark it as paused */
+static int hci_pause_scan_sync(struct hci_dev *hdev)
+{
+ if (hdev->scanning_paused)
+ return 0;
+
+ /* Disable page scan if enabled */
+ if (test_bit(HCI_PSCAN, &hdev->flags))
+ hci_write_scan_enable_sync(hdev, SCAN_DISABLED);
+
+ hci_scan_disable_sync(hdev);
+
+ hdev->scanning_paused = true;
+
+ return 0;
+}
+
/* This function performs the HCI suspend procedures in the follow order:
*
* Pause discovery (active scanning/inquiry)
* Pause Directed Advertising/Advertising
+ * Pause Scanning (passive scanning in case discovery was not active)
* Disconnect all connections
* Set suspend_status to BT_SUSPEND_DISCONNECT if hdev cannot wakeup
* otherwise:
@@ -4882,23 +5632,26 @@ int hci_suspend_sync(struct hci_dev *hdev)
/* Pause other advertisements */
hci_pause_advertising_sync(hdev);
- /* Disable page scan if enabled */
- if (test_bit(HCI_PSCAN, &hdev->flags))
- hci_write_scan_enable_sync(hdev, SCAN_DISABLED);
-
/* Suspend monitor filters */
hci_suspend_monitor_sync(hdev);
/* Prevent disconnects from causing scanning to be re-enabled */
- hdev->scanning_paused = true;
+ hci_pause_scan_sync(hdev);
- /* Soft disconnect everything (power off) */
- err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
- if (err) {
- /* Set state to BT_RUNNING so resume doesn't notify */
- hdev->suspend_state = BT_RUNNING;
- hci_resume_sync(hdev);
- return err;
+ if (hci_conn_count(hdev)) {
+ /* Soft disconnect everything (power off) */
+ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
+ if (err) {
+ /* Set state to BT_RUNNING so resume doesn't notify */
+ hdev->suspend_state = BT_RUNNING;
+ hci_resume_sync(hdev);
+ return err;
+ }
+
+ /* Update event mask so only the allowed event can wakeup the
+ * host.
+ */
+ hci_set_event_mask_sync(hdev);
}
/* Only configure accept list if disconnect succeeded and wake
@@ -4912,9 +5665,6 @@ int hci_suspend_sync(struct hci_dev *hdev)
/* Unpause to take care of updating scanning params */
hdev->scanning_paused = false;
- /* Update event mask so only the allowed event can wakeup the host */
- hci_set_event_mask_sync(hdev);
-
/* Enable event filter for paired devices */
hci_update_event_filter_sync(hdev);
@@ -4961,6 +5711,22 @@ static void hci_resume_monitor_sync(struct hci_dev *hdev)
}
}
+/* This function resume scan and reset paused flag */
+static int hci_resume_scan_sync(struct hci_dev *hdev)
+{
+ if (!hdev->scanning_paused)
+ return 0;
+
+ hdev->scanning_paused = false;
+
+ hci_update_scan_sync(hdev);
+
+ /* Reset passive scanning to normal */
+ hci_update_passive_scan_sync(hdev);
+
+ return 0;
+}
+
/* This function performs the HCI suspend procedures in the follow order:
*
* Restore event mask
@@ -4976,17 +5742,15 @@ int hci_resume_sync(struct hci_dev *hdev)
return 0;
hdev->suspended = false;
- hdev->scanning_paused = false;
/* Restore event mask */
hci_set_event_mask_sync(hdev);
/* Clear any event filters and restore scan state */
hci_clear_event_filter_sync(hdev);
- hci_update_scan_sync(hdev);
- /* Reset passive scanning to normal */
- hci_update_passive_scan_sync(hdev);
+ /* Resume scanning */
+ hci_resume_scan_sync(hdev);
/* Resume monitor filters */
hci_resume_monitor_sync(hdev);
@@ -5140,8 +5904,8 @@ static void set_ext_conn_params(struct hci_conn *conn,
p->max_ce_len = cpu_to_le16(0x0000);
}
-int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
- u8 own_addr_type)
+static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
+ struct hci_conn *conn, u8 own_addr_type)
{
struct hci_cp_le_ext_create_conn *cp;
struct hci_cp_le_ext_conn_param *p;
@@ -5185,7 +5949,7 @@ int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN,
plen, data,
HCI_EV_LE_ENHANCED_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ conn->conn_timeout, NULL);
}
int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
@@ -5270,12 +6034,147 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
cp.min_ce_len = cpu_to_le16(0x0000);
cp.max_ce_len = cpu_to_le16(0x0000);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261:
+ *
+ * If this event is unmasked and the HCI_LE_Connection_Complete event
+ * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is
+ * sent when a new connection has been created.
+ */
err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN,
- sizeof(cp), &cp, HCI_EV_LE_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ sizeof(cp), &cp,
+ use_enhanced_conn_complete(hdev) ?
+ HCI_EV_LE_ENHANCED_CONN_COMPLETE :
+ HCI_EV_LE_CONN_COMPLETE,
+ conn->conn_timeout, NULL);
done:
/* Re-enable advertising after the connection attempt is finished. */
hci_resume_advertising_sync(hdev);
return err;
}
+
+int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle)
+{
+ struct hci_cp_le_remove_cig cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.cig_id = handle;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_REMOVE_CIG, sizeof(cp),
+ &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle)
+{
+ struct hci_cp_le_big_term_sync cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = handle;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_BIG_TERM_SYNC,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle)
+{
+ struct hci_cp_le_pa_term_sync cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(handle);
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_TERM_SYNC,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
+ bool use_rpa, struct adv_info *adv_instance,
+ u8 *own_addr_type, bdaddr_t *rand_addr)
+{
+ int err;
+
+ bacpy(rand_addr, BDADDR_ANY);
+
+ /* If privacy is enabled use a resolvable private address. If
+ * current RPA has expired then generate a new one.
+ */
+ if (use_rpa) {
+ /* If Controller supports LL Privacy use own address type is
+ * 0x03
+ */
+ if (use_ll_privacy(hdev))
+ *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
+ else
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+
+ if (adv_instance) {
+ if (adv_rpa_valid(adv_instance))
+ return 0;
+ } else {
+ if (rpa_valid(hdev))
+ return 0;
+ }
+
+ err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
+ if (err < 0) {
+ bt_dev_err(hdev, "failed to generate new RPA");
+ return err;
+ }
+
+ bacpy(rand_addr, &hdev->rpa);
+
+ return 0;
+ }
+
+ /* In case of required privacy without resolvable private address,
+ * use an non-resolvable private address. This is useful for
+ * non-connectable advertising.
+ */
+ if (require_privacy) {
+ bdaddr_t nrpa;
+
+ while (true) {
+ /* The non-resolvable private address is generated
+ * from random six bytes with the two most significant
+ * bits cleared.
+ */
+ get_random_bytes(&nrpa, 6);
+ nrpa.b[5] &= 0x3f;
+
+ /* The non-resolvable private address shall not be
+ * equal to the public address.
+ */
+ if (bacmp(&hdev->bdaddr, &nrpa))
+ break;
+ }
+
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(rand_addr, &nrpa);
+
+ return 0;
+ }
+
+ /* No privacy so use a public address. */
+ *own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+ return 0;
+}
+
+static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
+{
+ u8 instance = *(u8 *)data;
+
+ kfree(data);
+
+ return hci_update_adv_data_sync(hdev, instance);
+}
+
+int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
+{
+ u8 *inst_ptr = kmalloc(1, GFP_KERNEL);
+
+ if (!inst_ptr)
+ return -ENOMEM;
+
+ *inst_ptr = instance;
+ return hci_cmd_sync_queue(hdev, _update_adv_data_sync, inst_ptr, NULL);
+}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 4e3e0451b08c..08542dfc2dc5 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -48,6 +48,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
BT_DBG("conn %p", conn);
+ if (device_is_registered(&conn->dev))
+ return;
+
dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
if (device_add(&conn->dev) < 0) {
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 80848dfc01db..cc20e706c639 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -83,14 +83,14 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo
ci->product = session->input->id.product;
ci->version = session->input->id.version;
if (session->input->name)
- strlcpy(ci->name, session->input->name, 128);
+ strscpy(ci->name, session->input->name, 128);
else
- strlcpy(ci->name, "HID Boot Device", 128);
+ strscpy(ci->name, "HID Boot Device", 128);
} else if (session->hid) {
ci->vendor = session->hid->vendor;
ci->product = session->hid->product;
ci->version = session->hid->version;
- strlcpy(ci->name, session->hid->name, 128);
+ strscpy(ci->name, session->hid->name, 128);
}
}
@@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg)
l2cap_unregister_user(session->conn, &session->user);
hidp_session_put(session);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
new file mode 100644
index 000000000000..f825857db6d0
--- /dev/null
+++ b/net/bluetooth/iso.c
@@ -0,0 +1,1860 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BlueZ - Bluetooth protocol stack for Linux
+ *
+ * Copyright (C) 2022 Intel Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/sched/signal.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/iso.h>
+
+static const struct proto_ops iso_sock_ops;
+
+static struct bt_sock_list iso_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(iso_sk_list.lock)
+};
+
+/* ---- ISO connections ---- */
+struct iso_conn {
+ struct hci_conn *hcon;
+
+ /* @lock: spinlock protecting changes to iso_conn fields */
+ spinlock_t lock;
+ struct sock *sk;
+
+ struct delayed_work timeout_work;
+
+ struct sk_buff *rx_skb;
+ __u32 rx_len;
+ __u16 tx_sn;
+};
+
+#define iso_conn_lock(c) spin_lock(&(c)->lock)
+#define iso_conn_unlock(c) spin_unlock(&(c)->lock)
+
+static void iso_sock_close(struct sock *sk);
+static void iso_sock_kill(struct sock *sk);
+
+/* ----- ISO socket info ----- */
+#define iso_pi(sk) ((struct iso_pinfo *)sk)
+
+#define EIR_SERVICE_DATA_LENGTH 4
+#define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH)
+
+struct iso_pinfo {
+ struct bt_sock bt;
+ bdaddr_t src;
+ __u8 src_type;
+ bdaddr_t dst;
+ __u8 dst_type;
+ __u8 bc_sid;
+ __u8 bc_num_bis;
+ __u8 bc_bis[ISO_MAX_NUM_BIS];
+ __u16 sync_handle;
+ __u32 flags;
+ struct bt_iso_qos qos;
+ __u8 base_len;
+ __u8 base[BASE_MAX_LENGTH];
+ struct iso_conn *conn;
+};
+
+/* ---- ISO timers ---- */
+#define ISO_CONN_TIMEOUT (HZ * 40)
+#define ISO_DISCONN_TIMEOUT (HZ * 2)
+
+static void iso_sock_timeout(struct work_struct *work)
+{
+ struct iso_conn *conn = container_of(work, struct iso_conn,
+ timeout_work.work);
+ struct sock *sk;
+
+ iso_conn_lock(conn);
+ sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
+ iso_conn_unlock(conn);
+
+ if (!sk)
+ return;
+
+ BT_DBG("sock %p state %d", sk, sk->sk_state);
+
+ lock_sock(sk);
+ sk->sk_err = ETIMEDOUT;
+ sk->sk_state_change(sk);
+ release_sock(sk);
+ sock_put(sk);
+}
+
+static void iso_sock_set_timer(struct sock *sk, long timeout)
+{
+ if (!iso_pi(sk)->conn)
+ return;
+
+ BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
+ cancel_delayed_work(&iso_pi(sk)->conn->timeout_work);
+ schedule_delayed_work(&iso_pi(sk)->conn->timeout_work, timeout);
+}
+
+static void iso_sock_clear_timer(struct sock *sk)
+{
+ if (!iso_pi(sk)->conn)
+ return;
+
+ BT_DBG("sock %p state %d", sk, sk->sk_state);
+ cancel_delayed_work(&iso_pi(sk)->conn->timeout_work);
+}
+
+/* ---- ISO connections ---- */
+static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
+{
+ struct iso_conn *conn = hcon->iso_data;
+
+ if (conn)
+ return conn;
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return NULL;
+
+ spin_lock_init(&conn->lock);
+ INIT_DELAYED_WORK(&conn->timeout_work, iso_sock_timeout);
+
+ hcon->iso_data = conn;
+ conn->hcon = hcon;
+ conn->tx_sn = 0;
+
+ BT_DBG("hcon %p conn %p", hcon, conn);
+
+ return conn;
+}
+
+/* Delete channel. Must be called on the locked socket. */
+static void iso_chan_del(struct sock *sk, int err)
+{
+ struct iso_conn *conn;
+ struct sock *parent;
+
+ conn = iso_pi(sk)->conn;
+
+ BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
+
+ if (conn) {
+ iso_conn_lock(conn);
+ conn->sk = NULL;
+ iso_pi(sk)->conn = NULL;
+ iso_conn_unlock(conn);
+
+ if (conn->hcon)
+ hci_conn_drop(conn->hcon);
+ }
+
+ sk->sk_state = BT_CLOSED;
+ sk->sk_err = err;
+
+ parent = bt_sk(sk)->parent;
+ if (parent) {
+ bt_accept_unlink(sk);
+ parent->sk_data_ready(parent);
+ } else {
+ sk->sk_state_change(sk);
+ }
+
+ sock_set_flag(sk, SOCK_ZAPPED);
+}
+
+static void iso_conn_del(struct hci_conn *hcon, int err)
+{
+ struct iso_conn *conn = hcon->iso_data;
+ struct sock *sk;
+
+ if (!conn)
+ return;
+
+ BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
+
+ /* Kill socket */
+ iso_conn_lock(conn);
+ sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
+ iso_conn_unlock(conn);
+
+ if (sk) {
+ lock_sock(sk);
+ iso_sock_clear_timer(sk);
+ iso_chan_del(sk, err);
+ release_sock(sk);
+ sock_put(sk);
+ }
+
+ /* Ensure no more work items will run before freeing conn. */
+ cancel_delayed_work_sync(&conn->timeout_work);
+
+ hcon->iso_data = NULL;
+ kfree(conn);
+}
+
+static int __iso_chan_add(struct iso_conn *conn, struct sock *sk,
+ struct sock *parent)
+{
+ BT_DBG("conn %p", conn);
+
+ if (iso_pi(sk)->conn == conn && conn->sk == sk)
+ return 0;
+
+ if (conn->sk) {
+ BT_ERR("conn->sk already set");
+ return -EBUSY;
+ }
+
+ iso_pi(sk)->conn = conn;
+ conn->sk = sk;
+
+ if (parent)
+ bt_accept_enqueue(parent, sk, true);
+
+ return 0;
+}
+
+static int iso_chan_add(struct iso_conn *conn, struct sock *sk,
+ struct sock *parent)
+{
+ int err;
+
+ iso_conn_lock(conn);
+ err = __iso_chan_add(conn, sk, parent);
+ iso_conn_unlock(conn);
+
+ return err;
+}
+
+static inline u8 le_addr_type(u8 bdaddr_type)
+{
+ if (bdaddr_type == BDADDR_LE_PUBLIC)
+ return ADDR_LE_DEV_PUBLIC;
+ else
+ return ADDR_LE_DEV_RANDOM;
+}
+
+static int iso_connect_bis(struct sock *sk)
+{
+ struct iso_conn *conn;
+ struct hci_conn *hcon;
+ struct hci_dev *hdev;
+ int err;
+
+ BT_DBG("%pMR", &iso_pi(sk)->src);
+
+ hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
+ iso_pi(sk)->src_type);
+ if (!hdev)
+ return -EHOSTUNREACH;
+
+ hci_dev_lock(hdev);
+
+ if (!bis_capable(hdev)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
+ /* Fail if out PHYs are marked as disabled */
+ if (!iso_pi(sk)->qos.out.phy) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->dst_type,
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto done;
+ }
+
+ conn = iso_conn_add(hcon);
+ if (!conn) {
+ hci_conn_drop(hcon);
+ err = -ENOMEM;
+ goto done;
+ }
+
+ /* Update source addr of the socket */
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ goto done;
+
+ if (hcon->state == BT_CONNECTED) {
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECTED;
+ } else {
+ sk->sk_state = BT_CONNECT;
+ iso_sock_set_timer(sk, sk->sk_sndtimeo);
+ }
+
+done:
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
+ return err;
+}
+
+static int iso_connect_cis(struct sock *sk)
+{
+ struct iso_conn *conn;
+ struct hci_conn *hcon;
+ struct hci_dev *hdev;
+ int err;
+
+ BT_DBG("%pMR -> %pMR", &iso_pi(sk)->src, &iso_pi(sk)->dst);
+
+ hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
+ iso_pi(sk)->src_type);
+ if (!hdev)
+ return -EHOSTUNREACH;
+
+ hci_dev_lock(hdev);
+
+ if (!cis_central_capable(hdev)) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
+ /* Fail if either PHYs are marked as disabled */
+ if (!iso_pi(sk)->qos.in.phy && !iso_pi(sk)->qos.out.phy) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ /* Just bind if DEFER_SETUP has been set */
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ hcon = hci_bind_cis(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ &iso_pi(sk)->qos);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto done;
+ }
+ } else {
+ hcon = hci_connect_cis(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ &iso_pi(sk)->qos);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto done;
+ }
+ }
+
+ conn = iso_conn_add(hcon);
+ if (!conn) {
+ hci_conn_drop(hcon);
+ err = -ENOMEM;
+ goto done;
+ }
+
+ /* Update source addr of the socket */
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ goto done;
+
+ if (hcon->state == BT_CONNECTED) {
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECTED;
+ } else if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECT;
+ } else {
+ sk->sk_state = BT_CONNECT;
+ iso_sock_set_timer(sk, sk->sk_sndtimeo);
+ }
+
+done:
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
+ return err;
+}
+
+static struct bt_iso_qos *iso_sock_get_qos(struct sock *sk)
+{
+ if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONNECT2)
+ return &iso_pi(sk)->conn->hcon->iso_qos;
+
+ return &iso_pi(sk)->qos;
+}
+
+static int iso_send_frame(struct sock *sk, struct sk_buff *skb)
+{
+ struct iso_conn *conn = iso_pi(sk)->conn;
+ struct bt_iso_qos *qos = iso_sock_get_qos(sk);
+ struct hci_iso_data_hdr *hdr;
+ int len = 0;
+
+ BT_DBG("sk %p len %d", sk, skb->len);
+
+ if (skb->len > qos->out.sdu)
+ return -EMSGSIZE;
+
+ len = skb->len;
+
+ /* Push ISO data header */
+ hdr = skb_push(skb, HCI_ISO_DATA_HDR_SIZE);
+ hdr->sn = cpu_to_le16(conn->tx_sn++);
+ hdr->slen = cpu_to_le16(hci_iso_data_len_pack(len,
+ HCI_ISO_STATUS_VALID));
+
+ if (sk->sk_state == BT_CONNECTED)
+ hci_send_iso(conn->hcon, skb);
+ else
+ len = -ENOTCONN;
+
+ return len;
+}
+
+static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb)
+{
+ struct sock *sk;
+
+ iso_conn_lock(conn);
+ sk = conn->sk;
+ iso_conn_unlock(conn);
+
+ if (!sk)
+ goto drop;
+
+ BT_DBG("sk %p len %d", sk, skb->len);
+
+ if (sk->sk_state != BT_CONNECTED)
+ goto drop;
+
+ if (!sock_queue_rcv_skb(sk, skb))
+ return;
+
+drop:
+ kfree_skb(skb);
+}
+
+/* -------- Socket interface ---------- */
+static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
+{
+ struct sock *sk;
+
+ sk_for_each(sk, &iso_sk_list.head) {
+ if (sk->sk_state != BT_LISTEN)
+ continue;
+
+ if (!bacmp(&iso_pi(sk)->src, ba))
+ return sk;
+ }
+
+ return NULL;
+}
+
+static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc,
+ __u8 sid)
+{
+ struct sock *sk;
+
+ sk_for_each(sk, &iso_sk_list.head) {
+ if (sk->sk_state != BT_LISTEN)
+ continue;
+
+ if (bacmp(&iso_pi(sk)->src, ba))
+ continue;
+
+ if (bacmp(&iso_pi(sk)->dst, bc))
+ continue;
+
+ if (iso_pi(sk)->bc_sid == sid)
+ return sk;
+ }
+
+ return NULL;
+}
+
+typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
+
+/* Find socket listening:
+ * source bdaddr (Unicast)
+ * destination bdaddr (Broadcast only)
+ * match func - pass NULL to ignore
+ * match func data - pass -1 to ignore
+ * Returns closest match.
+ */
+static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
+ iso_sock_match_t match, void *data)
+{
+ struct sock *sk = NULL, *sk1 = NULL;
+
+ read_lock(&iso_sk_list.lock);
+
+ sk_for_each(sk, &iso_sk_list.head) {
+ if (sk->sk_state != BT_LISTEN)
+ continue;
+
+ /* Match Broadcast destination */
+ if (bacmp(dst, BDADDR_ANY) && bacmp(&iso_pi(sk)->dst, dst))
+ continue;
+
+ /* Use Match function if provided */
+ if (match && !match(sk, data))
+ continue;
+
+ /* Exact match. */
+ if (!bacmp(&iso_pi(sk)->src, src))
+ break;
+
+ /* Closest match */
+ if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY))
+ sk1 = sk;
+ }
+
+ read_unlock(&iso_sk_list.lock);
+
+ return sk ? sk : sk1;
+}
+
+static void iso_sock_destruct(struct sock *sk)
+{
+ BT_DBG("sk %p", sk);
+
+ skb_queue_purge(&sk->sk_receive_queue);
+ skb_queue_purge(&sk->sk_write_queue);
+}
+
+static void iso_sock_cleanup_listen(struct sock *parent)
+{
+ struct sock *sk;
+
+ BT_DBG("parent %p", parent);
+
+ /* Close not yet accepted channels */
+ while ((sk = bt_accept_dequeue(parent, NULL))) {
+ iso_sock_close(sk);
+ iso_sock_kill(sk);
+ }
+
+ parent->sk_state = BT_CLOSED;
+ sock_set_flag(parent, SOCK_ZAPPED);
+}
+
+/* Kill socket (only if zapped and orphan)
+ * Must be called on unlocked socket.
+ */
+static void iso_sock_kill(struct sock *sk)
+{
+ if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
+ sock_flag(sk, SOCK_DEAD))
+ return;
+
+ BT_DBG("sk %p state %d", sk, sk->sk_state);
+
+ /* Kill poor orphan */
+ bt_sock_unlink(&iso_sk_list, sk);
+ sock_set_flag(sk, SOCK_DEAD);
+ sock_put(sk);
+}
+
+static void iso_conn_defer_reject(struct hci_conn *conn)
+{
+ struct hci_cp_le_reject_cis cp;
+
+ BT_DBG("conn %p", conn);
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.reason = HCI_ERROR_REJ_BAD_ADDR;
+ hci_send_cmd(conn->hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp);
+}
+
+static void __iso_sock_close(struct sock *sk)
+{
+ BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket);
+
+ switch (sk->sk_state) {
+ case BT_LISTEN:
+ iso_sock_cleanup_listen(sk);
+ break;
+
+ case BT_CONNECTED:
+ case BT_CONFIG:
+ if (iso_pi(sk)->conn->hcon) {
+ sk->sk_state = BT_DISCONN;
+ iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
+ iso_conn_lock(iso_pi(sk)->conn);
+ hci_conn_drop(iso_pi(sk)->conn->hcon);
+ iso_pi(sk)->conn->hcon = NULL;
+ iso_conn_unlock(iso_pi(sk)->conn);
+ } else {
+ iso_chan_del(sk, ECONNRESET);
+ }
+ break;
+
+ case BT_CONNECT2:
+ if (iso_pi(sk)->conn->hcon)
+ iso_conn_defer_reject(iso_pi(sk)->conn->hcon);
+ iso_chan_del(sk, ECONNRESET);
+ break;
+ case BT_CONNECT:
+ /* In case of DEFER_SETUP the hcon would be bound to CIG which
+ * needs to be removed so just call hci_conn_del so the cleanup
+ * callback do what is needed.
+ */
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
+ iso_pi(sk)->conn->hcon) {
+ hci_conn_del(iso_pi(sk)->conn->hcon);
+ iso_pi(sk)->conn->hcon = NULL;
+ }
+
+ iso_chan_del(sk, ECONNRESET);
+ break;
+ case BT_DISCONN:
+ iso_chan_del(sk, ECONNRESET);
+ break;
+
+ default:
+ sock_set_flag(sk, SOCK_ZAPPED);
+ break;
+ }
+}
+
+/* Must be called on unlocked socket. */
+static void iso_sock_close(struct sock *sk)
+{
+ iso_sock_clear_timer(sk);
+ lock_sock(sk);
+ __iso_sock_close(sk);
+ release_sock(sk);
+ iso_sock_kill(sk);
+}
+
+static void iso_sock_init(struct sock *sk, struct sock *parent)
+{
+ BT_DBG("sk %p", sk);
+
+ if (parent) {
+ sk->sk_type = parent->sk_type;
+ bt_sk(sk)->flags = bt_sk(parent)->flags;
+ security_sk_clone(parent, sk);
+ }
+}
+
+static struct proto iso_proto = {
+ .name = "ISO",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct iso_pinfo)
+};
+
+#define DEFAULT_IO_QOS \
+{ \
+ .interval = 10000u, \
+ .latency = 10u, \
+ .sdu = 40u, \
+ .phy = BT_ISO_PHY_2M, \
+ .rtn = 2u, \
+}
+
+static struct bt_iso_qos default_qos = {
+ .cig = BT_ISO_QOS_CIG_UNSET,
+ .cis = BT_ISO_QOS_CIS_UNSET,
+ .sca = 0x00,
+ .packing = 0x00,
+ .framing = 0x00,
+ .in = DEFAULT_IO_QOS,
+ .out = DEFAULT_IO_QOS,
+};
+
+static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
+ int proto, gfp_t prio, int kern)
+{
+ struct sock *sk;
+
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &iso_proto, kern);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk);
+ INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+ sk->sk_destruct = iso_sock_destruct;
+ sk->sk_sndtimeo = ISO_CONN_TIMEOUT;
+
+ sock_reset_flag(sk, SOCK_ZAPPED);
+
+ sk->sk_protocol = proto;
+ sk->sk_state = BT_OPEN;
+
+ /* Set address type as public as default src address is BDADDR_ANY */
+ iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
+
+ iso_pi(sk)->qos = default_qos;
+
+ bt_sock_link(&iso_sk_list, sk);
+ return sk;
+}
+
+static int iso_sock_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ struct sock *sk;
+
+ BT_DBG("sock %p", sock);
+
+ sock->state = SS_UNCONNECTED;
+
+ if (sock->type != SOCK_SEQPACKET)
+ return -ESOCKTNOSUPPORT;
+
+ sock->ops = &iso_sock_ops;
+
+ sk = iso_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ iso_sock_init(sk, NULL);
+ return 0;
+}
+
+static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
+ int addr_len)
+{
+ struct sockaddr_iso *sa = (struct sockaddr_iso *)addr;
+ struct sock *sk = sock->sk;
+ int i;
+
+ BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid,
+ sa->iso_bc->bc_num_bis);
+
+ if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc) ||
+ sa->iso_bc->bc_num_bis < 0x01 || sa->iso_bc->bc_num_bis > 0x1f)
+ return -EINVAL;
+
+ bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr);
+ iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
+ iso_pi(sk)->sync_handle = -1;
+ iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid;
+ iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis;
+
+ for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) {
+ if (sa->iso_bc->bc_bis[i] < 0x01 ||
+ sa->iso_bc->bc_bis[i] > 0x1f)
+ return -EINVAL;
+
+ memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
+ iso_pi(sk)->bc_num_bis);
+ }
+
+ return 0;
+}
+
+static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
+ int addr_len)
+{
+ struct sockaddr_iso *sa = (struct sockaddr_iso *)addr;
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ BT_DBG("sk %p %pMR type %u", sk, &sa->iso_bdaddr, sa->iso_bdaddr_type);
+
+ if (!addr || addr_len < sizeof(struct sockaddr_iso) ||
+ addr->sa_family != AF_BLUETOOTH)
+ return -EINVAL;
+
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_OPEN) {
+ err = -EBADFD;
+ goto done;
+ }
+
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ /* Check if the address type is of LE type */
+ if (!bdaddr_type_is_le(sa->iso_bdaddr_type)) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ bacpy(&iso_pi(sk)->src, &sa->iso_bdaddr);
+ iso_pi(sk)->src_type = sa->iso_bdaddr_type;
+
+ /* Check for Broadcast address */
+ if (addr_len > sizeof(*sa)) {
+ err = iso_sock_bind_bc(sock, addr, addr_len);
+ if (err)
+ goto done;
+ }
+
+ sk->sk_state = BT_BOUND;
+
+done:
+ release_sock(sk);
+ return err;
+}
+
+static int iso_sock_connect(struct socket *sock, struct sockaddr *addr,
+ int alen, int flags)
+{
+ struct sockaddr_iso *sa = (struct sockaddr_iso *)addr;
+ struct sock *sk = sock->sk;
+ int err;
+
+ BT_DBG("sk %p", sk);
+
+ if (alen < sizeof(struct sockaddr_iso) ||
+ addr->sa_family != AF_BLUETOOTH)
+ return -EINVAL;
+
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
+ return -EBADFD;
+
+ if (sk->sk_type != SOCK_SEQPACKET)
+ return -EINVAL;
+
+ /* Check if the address type is of LE type */
+ if (!bdaddr_type_is_le(sa->iso_bdaddr_type))
+ return -EINVAL;
+
+ lock_sock(sk);
+
+ bacpy(&iso_pi(sk)->dst, &sa->iso_bdaddr);
+ iso_pi(sk)->dst_type = sa->iso_bdaddr_type;
+
+ if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY))
+ err = iso_connect_cis(sk);
+ else
+ err = iso_connect_bis(sk);
+
+ if (err)
+ goto done;
+
+ if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ err = bt_sock_wait_state(sk, BT_CONNECTED,
+ sock_sndtimeo(sk, flags & O_NONBLOCK));
+ }
+
+done:
+ release_sock(sk);
+ return err;
+}
+
+static int iso_listen_bis(struct sock *sk)
+{
+ struct hci_dev *hdev;
+ int err = 0;
+
+ BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src,
+ &iso_pi(sk)->dst, iso_pi(sk)->bc_sid);
+
+ write_lock(&iso_sk_list.lock);
+
+ if (__iso_get_sock_listen_by_sid(&iso_pi(sk)->src, &iso_pi(sk)->dst,
+ iso_pi(sk)->bc_sid))
+ err = -EADDRINUSE;
+
+ write_unlock(&iso_sk_list.lock);
+
+ if (err)
+ return err;
+
+ hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
+ iso_pi(sk)->src_type);
+ if (!hdev)
+ return -EHOSTUNREACH;
+
+ hci_dev_lock(hdev);
+
+ err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst, iso_pi(sk)->dst_type,
+ iso_pi(sk)->bc_sid);
+
+ hci_dev_unlock(hdev);
+
+ return err;
+}
+
+static int iso_listen_cis(struct sock *sk)
+{
+ int err = 0;
+
+ BT_DBG("%pMR", &iso_pi(sk)->src);
+
+ write_lock(&iso_sk_list.lock);
+
+ if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src))
+ err = -EADDRINUSE;
+
+ write_unlock(&iso_sk_list.lock);
+
+ return err;
+}
+
+static int iso_sock_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ BT_DBG("sk %p backlog %d", sk, backlog);
+
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
+
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (!bacmp(&iso_pi(sk)->dst, BDADDR_ANY))
+ err = iso_listen_cis(sk);
+ else
+ err = iso_listen_bis(sk);
+
+ if (err)
+ goto done;
+
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_ack_backlog = 0;
+
+ sk->sk_state = BT_LISTEN;
+
+done:
+ release_sock(sk);
+ return err;
+}
+
+static int iso_sock_accept(struct socket *sock, struct socket *newsock,
+ int flags, bool kern)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct sock *sk = sock->sk, *ch;
+ long timeo;
+ int err = 0;
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+ BT_DBG("sk %p timeo %ld", sk, timeo);
+
+ /* Wait for an incoming connection. (wake-one). */
+ add_wait_queue_exclusive(sk_sleep(sk), &wait);
+ while (1) {
+ if (sk->sk_state != BT_LISTEN) {
+ err = -EBADFD;
+ break;
+ }
+
+ ch = bt_accept_dequeue(sk, newsock);
+ if (ch)
+ break;
+
+ if (!timeo) {
+ err = -EAGAIN;
+ break;
+ }
+
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeo);
+ break;
+ }
+
+ release_sock(sk);
+
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ lock_sock(sk);
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+
+ if (err)
+ goto done;
+
+ newsock->state = SS_CONNECTED;
+
+ BT_DBG("new socket %p", ch);
+
+done:
+ release_sock(sk);
+ return err;
+}
+
+static int iso_sock_getname(struct socket *sock, struct sockaddr *addr,
+ int peer)
+{
+ struct sockaddr_iso *sa = (struct sockaddr_iso *)addr;
+ struct sock *sk = sock->sk;
+
+ BT_DBG("sock %p, sk %p", sock, sk);
+
+ addr->sa_family = AF_BLUETOOTH;
+
+ if (peer) {
+ bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst);
+ sa->iso_bdaddr_type = iso_pi(sk)->dst_type;
+ } else {
+ bacpy(&sa->iso_bdaddr, &iso_pi(sk)->src);
+ sa->iso_bdaddr_type = iso_pi(sk)->src_type;
+ }
+
+ return sizeof(struct sockaddr_iso);
+}
+
+static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
+{
+ struct sock *sk = sock->sk;
+ struct iso_conn *conn = iso_pi(sk)->conn;
+ struct sk_buff *skb, **frag;
+ int err;
+
+ BT_DBG("sock %p, sk %p", sock, sk);
+
+ err = sock_error(sk);
+ if (err)
+ return err;
+
+ if (msg->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ if (sk->sk_state != BT_CONNECTED)
+ return -ENOTCONN;
+
+ skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
+ HCI_ISO_DATA_HDR_SIZE, 0);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ len -= skb->len;
+
+ BT_DBG("skb %p len %d", sk, skb->len);
+
+ /* Continuation fragments */
+ frag = &skb_shinfo(skb)->frag_list;
+ while (len) {
+ struct sk_buff *tmp;
+
+ tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
+ 0, 0);
+ if (IS_ERR(tmp)) {
+ kfree_skb(skb);
+ return PTR_ERR(tmp);
+ }
+
+ *frag = tmp;
+
+ len -= tmp->len;
+
+ skb->len += tmp->len;
+ skb->data_len += tmp->len;
+
+ BT_DBG("frag %p len %d", *frag, tmp->len);
+
+ frag = &(*frag)->next;
+ }
+
+ lock_sock(sk);
+
+ if (sk->sk_state == BT_CONNECTED)
+ err = iso_send_frame(sk, skb);
+ else
+ err = -ENOTCONN;
+
+ release_sock(sk);
+
+ if (err < 0)
+ kfree_skb(skb);
+ return err;
+}
+
+static void iso_conn_defer_accept(struct hci_conn *conn)
+{
+ struct hci_cp_le_accept_cis cp;
+ struct hci_dev *hdev = conn->hdev;
+
+ BT_DBG("conn %p", conn);
+
+ conn->state = BT_CONFIG;
+
+ cp.handle = cpu_to_le16(conn->handle);
+
+ hci_send_cmd(hdev, HCI_OP_LE_ACCEPT_CIS, sizeof(cp), &cp);
+}
+
+static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct iso_pinfo *pi = iso_pi(sk);
+ int err;
+
+ BT_DBG("sk %p", sk);
+
+ lock_sock(sk);
+
+ if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ switch (sk->sk_state) {
+ case BT_CONNECT2:
+ iso_conn_defer_accept(pi->conn->hcon);
+ sk->sk_state = BT_CONFIG;
+ release_sock(sk);
+ return 0;
+ case BT_CONNECT:
+ err = iso_connect_cis(sk);
+ release_sock(sk);
+ return err;
+ }
+ }
+
+ release_sock(sk);
+
+ return bt_sock_recvmsg(sock, msg, len, flags);
+}
+
+static bool check_io_qos(struct bt_iso_io_qos *qos)
+{
+ /* If no PHY is enable SDU must be 0 */
+ if (!qos->phy && qos->sdu)
+ return false;
+
+ if (qos->interval && (qos->interval < 0xff || qos->interval > 0xfffff))
+ return false;
+
+ if (qos->latency && (qos->latency < 0x05 || qos->latency > 0xfa0))
+ return false;
+
+ if (qos->phy > BT_ISO_PHY_ANY)
+ return false;
+
+ return true;
+}
+
+static bool check_qos(struct bt_iso_qos *qos)
+{
+ if (qos->sca > 0x07)
+ return false;
+
+ if (qos->packing > 0x01)
+ return false;
+
+ if (qos->framing > 0x01)
+ return false;
+
+ if (!check_io_qos(&qos->in))
+ return false;
+
+ if (!check_io_qos(&qos->out))
+ return false;
+
+ return true;
+}
+
+static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ int len, err = 0;
+ struct bt_iso_qos qos;
+ u32 opt;
+
+ BT_DBG("sk %p", sk);
+
+ lock_sock(sk);
+
+ switch (optname) {
+ case BT_DEFER_SETUP:
+ if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt)
+ set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
+ else
+ clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
+ break;
+
+ case BT_ISO_QOS:
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
+ sk->sk_state != BT_CONNECT2) {
+ err = -EINVAL;
+ break;
+ }
+
+ len = min_t(unsigned int, sizeof(qos), optlen);
+ if (len != sizeof(qos)) {
+ err = -EINVAL;
+ break;
+ }
+
+ memset(&qos, 0, sizeof(qos));
+
+ if (copy_from_sockptr(&qos, optval, len)) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (!check_qos(&qos)) {
+ err = -EINVAL;
+ break;
+ }
+
+ iso_pi(sk)->qos = qos;
+
+ break;
+
+ case BT_ISO_BASE:
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
+ sk->sk_state != BT_CONNECT2) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (optlen > sizeof(iso_pi(sk)->base)) {
+ err = -EOVERFLOW;
+ break;
+ }
+
+ len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
+
+ if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
+ err = -EFAULT;
+ break;
+ }
+
+ iso_pi(sk)->base_len = len;
+
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+ release_sock(sk);
+ return err;
+}
+
+static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = sock->sk;
+ int len, err = 0;
+ struct bt_iso_qos *qos;
+ u8 base_len;
+ u8 *base;
+
+ BT_DBG("sk %p", sk);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+
+ switch (optname) {
+ case BT_DEFER_SETUP:
+ if (sk->sk_state == BT_CONNECTED) {
+ err = -EINVAL;
+ break;
+ }
+
+ if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags),
+ (u32 __user *)optval))
+ err = -EFAULT;
+
+ break;
+
+ case BT_ISO_QOS:
+ qos = iso_sock_get_qos(sk);
+
+ len = min_t(unsigned int, len, sizeof(*qos));
+ if (copy_to_user(optval, qos, len))
+ err = -EFAULT;
+
+ break;
+
+ case BT_ISO_BASE:
+ if (sk->sk_state == BT_CONNECTED) {
+ base_len = iso_pi(sk)->conn->hcon->le_per_adv_data_len;
+ base = iso_pi(sk)->conn->hcon->le_per_adv_data;
+ } else {
+ base_len = iso_pi(sk)->base_len;
+ base = iso_pi(sk)->base;
+ }
+
+ len = min_t(unsigned int, len, base_len);
+ if (copy_to_user(optval, base, len))
+ err = -EFAULT;
+
+ break;
+
+ default:
+ err = -ENOPROTOOPT;
+ break;
+ }
+
+ release_sock(sk);
+ return err;
+}
+
+static int iso_sock_shutdown(struct socket *sock, int how)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ BT_DBG("sock %p, sk %p, how %d", sock, sk, how);
+
+ if (!sk)
+ return 0;
+
+ sock_hold(sk);
+ lock_sock(sk);
+
+ switch (how) {
+ case SHUT_RD:
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ goto unlock;
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+ break;
+ case SHUT_WR:
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ goto unlock;
+ sk->sk_shutdown |= SEND_SHUTDOWN;
+ break;
+ case SHUT_RDWR:
+ if (sk->sk_shutdown & SHUTDOWN_MASK)
+ goto unlock;
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+ break;
+ }
+
+ iso_sock_clear_timer(sk);
+ __iso_sock_close(sk);
+
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
+ err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
+
+unlock:
+ release_sock(sk);
+ sock_put(sk);
+
+ return err;
+}
+
+static int iso_sock_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+
+ BT_DBG("sock %p, sk %p", sock, sk);
+
+ if (!sk)
+ return 0;
+
+ iso_sock_close(sk);
+
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING)) {
+ lock_sock(sk);
+ err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
+ release_sock(sk);
+ }
+
+ sock_orphan(sk);
+ iso_sock_kill(sk);
+ return err;
+}
+
+static void iso_sock_ready(struct sock *sk)
+{
+ BT_DBG("sk %p", sk);
+
+ if (!sk)
+ return;
+
+ lock_sock(sk);
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECTED;
+ sk->sk_state_change(sk);
+ release_sock(sk);
+}
+
+struct iso_list_data {
+ struct hci_conn *hcon;
+ int count;
+};
+
+static bool iso_match_big(struct sock *sk, void *data)
+{
+ struct hci_evt_le_big_sync_estabilished *ev = data;
+
+ return ev->handle == iso_pi(sk)->qos.big;
+}
+
+static void iso_conn_ready(struct iso_conn *conn)
+{
+ struct sock *parent;
+ struct sock *sk = conn->sk;
+ struct hci_ev_le_big_sync_estabilished *ev;
+
+ BT_DBG("conn %p", conn);
+
+ if (sk) {
+ iso_sock_ready(conn->sk);
+ } else {
+ iso_conn_lock(conn);
+
+ if (!conn->hcon) {
+ iso_conn_unlock(conn);
+ return;
+ }
+
+ ev = hci_recv_event_data(conn->hcon->hdev,
+ HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
+ if (ev)
+ parent = iso_get_sock_listen(&conn->hcon->src,
+ &conn->hcon->dst,
+ iso_match_big, ev);
+ else
+ parent = iso_get_sock_listen(&conn->hcon->src,
+ BDADDR_ANY, NULL, NULL);
+
+ if (!parent) {
+ iso_conn_unlock(conn);
+ return;
+ }
+
+ lock_sock(parent);
+
+ sk = iso_sock_alloc(sock_net(parent), NULL,
+ BTPROTO_ISO, GFP_ATOMIC, 0);
+ if (!sk) {
+ release_sock(parent);
+ iso_conn_unlock(conn);
+ return;
+ }
+
+ iso_sock_init(sk, parent);
+
+ bacpy(&iso_pi(sk)->src, &conn->hcon->src);
+ iso_pi(sk)->src_type = conn->hcon->src_type;
+
+ /* If hcon has no destination address (BDADDR_ANY) it means it
+ * was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to
+ * initialize using the parent socket destination address.
+ */
+ if (!bacmp(&conn->hcon->dst, BDADDR_ANY)) {
+ bacpy(&conn->hcon->dst, &iso_pi(parent)->dst);
+ conn->hcon->dst_type = iso_pi(parent)->dst_type;
+ conn->hcon->sync_handle = iso_pi(parent)->sync_handle;
+ }
+
+ bacpy(&iso_pi(sk)->dst, &conn->hcon->dst);
+ iso_pi(sk)->dst_type = conn->hcon->dst_type;
+
+ hci_conn_hold(conn->hcon);
+ __iso_chan_add(conn, sk, parent);
+
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
+ sk->sk_state = BT_CONNECT2;
+ else
+ sk->sk_state = BT_CONNECTED;
+
+ /* Wake up parent */
+ parent->sk_data_ready(parent);
+
+ release_sock(parent);
+
+ iso_conn_unlock(conn);
+ }
+}
+
+static bool iso_match_sid(struct sock *sk, void *data)
+{
+ struct hci_ev_le_pa_sync_established *ev = data;
+
+ return ev->sid == iso_pi(sk)->bc_sid;
+}
+
+static bool iso_match_sync_handle(struct sock *sk, void *data)
+{
+ struct hci_evt_le_big_info_adv_report *ev = data;
+
+ return le16_to_cpu(ev->sync_handle) == iso_pi(sk)->sync_handle;
+}
+
+/* ----- ISO interface with lower layer (HCI) ----- */
+
+int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
+{
+ struct hci_ev_le_pa_sync_established *ev1;
+ struct hci_evt_le_big_info_adv_report *ev2;
+ struct sock *sk;
+ int lm = 0;
+
+ bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr);
+
+ /* Broadcast receiver requires handling of some events before it can
+ * proceed to establishing a BIG sync:
+ *
+ * 1. HCI_EV_LE_PA_SYNC_ESTABLISHED: The socket may specify a specific
+ * SID to listen to and once sync is estabilished its handle needs to
+ * be stored in iso_pi(sk)->sync_handle so it can be matched once
+ * receiving the BIG Info.
+ * 2. HCI_EVT_LE_BIG_INFO_ADV_REPORT: When connect_ind is triggered by a
+ * a BIG Info it attempts to check if there any listening socket with
+ * the same sync_handle and if it does then attempt to create a sync.
+ */
+ ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED);
+ if (ev1) {
+ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid,
+ ev1);
+ if (sk)
+ iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
+
+ goto done;
+ }
+
+ ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT);
+ if (ev2) {
+ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
+ iso_match_sync_handle, ev2);
+ if (sk) {
+ int err;
+
+ if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
+ iso_pi(sk)->bc_num_bis = ev2->num_bis;
+
+ err = hci_le_big_create_sync(hdev,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
+ if (err) {
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ sk = NULL;
+ }
+ }
+ } else {
+ sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
+ }
+
+done:
+ if (!sk)
+ return lm;
+
+ lm |= HCI_LM_ACCEPT;
+
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))
+ *flags |= HCI_PROTO_DEFER;
+
+ return lm;
+}
+
+static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
+{
+ if (hcon->type != ISO_LINK) {
+ if (hcon->type != LE_LINK)
+ return;
+
+ /* Check if LE link has failed */
+ if (status) {
+ if (hcon->link)
+ iso_conn_del(hcon->link, bt_to_errno(status));
+ return;
+ }
+
+ /* Create CIS if pending */
+ hci_le_create_cis(hcon);
+ return;
+ }
+
+ BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
+
+ if (!status) {
+ struct iso_conn *conn;
+
+ conn = iso_conn_add(hcon);
+ if (conn)
+ iso_conn_ready(conn);
+ } else {
+ iso_conn_del(hcon, bt_to_errno(status));
+ }
+}
+
+static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+{
+ if (hcon->type != ISO_LINK)
+ return;
+
+ BT_DBG("hcon %p reason %d", hcon, reason);
+
+ iso_conn_del(hcon, bt_to_errno(reason));
+}
+
+void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
+{
+ struct iso_conn *conn = hcon->iso_data;
+ struct hci_iso_data_hdr *hdr;
+ __u16 pb, ts, len;
+
+ if (!conn)
+ goto drop;
+
+ pb = hci_iso_flags_pb(flags);
+ ts = hci_iso_flags_ts(flags);
+
+ BT_DBG("conn %p len %d pb 0x%x ts 0x%x", conn, skb->len, pb, ts);
+
+ switch (pb) {
+ case ISO_START:
+ case ISO_SINGLE:
+ if (conn->rx_len) {
+ BT_ERR("Unexpected start frame (len %d)", skb->len);
+ kfree_skb(conn->rx_skb);
+ conn->rx_skb = NULL;
+ conn->rx_len = 0;
+ }
+
+ if (ts) {
+ /* TODO: add timestamp to the packet? */
+ hdr = skb_pull_data(skb, HCI_ISO_TS_DATA_HDR_SIZE);
+ if (!hdr) {
+ BT_ERR("Frame is too short (len %d)", skb->len);
+ goto drop;
+ }
+
+ } else {
+ hdr = skb_pull_data(skb, HCI_ISO_DATA_HDR_SIZE);
+ if (!hdr) {
+ BT_ERR("Frame is too short (len %d)", skb->len);
+ goto drop;
+ }
+ }
+
+ len = __le16_to_cpu(hdr->slen);
+ flags = hci_iso_data_flags(len);
+ len = hci_iso_data_len(len);
+
+ BT_DBG("Start: total len %d, frag len %d flags 0x%4.4x", len,
+ skb->len, flags);
+
+ if (len == skb->len) {
+ /* Complete frame received */
+ iso_recv_frame(conn, skb);
+ return;
+ }
+
+ if (pb == ISO_SINGLE) {
+ BT_ERR("Frame malformed (len %d, expected len %d)",
+ skb->len, len);
+ goto drop;
+ }
+
+ if (skb->len > len) {
+ BT_ERR("Frame is too long (len %d, expected len %d)",
+ skb->len, len);
+ goto drop;
+ }
+
+ /* Allocate skb for the complete frame (with header) */
+ conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL);
+ if (!conn->rx_skb)
+ goto drop;
+
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
+ conn->rx_len = len - skb->len;
+ break;
+
+ case ISO_CONT:
+ BT_DBG("Cont: frag len %d (expecting %d)", skb->len,
+ conn->rx_len);
+
+ if (!conn->rx_len) {
+ BT_ERR("Unexpected continuation frame (len %d)",
+ skb->len);
+ goto drop;
+ }
+
+ if (skb->len > conn->rx_len) {
+ BT_ERR("Fragment is too long (len %d, expected %d)",
+ skb->len, conn->rx_len);
+ kfree_skb(conn->rx_skb);
+ conn->rx_skb = NULL;
+ conn->rx_len = 0;
+ goto drop;
+ }
+
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
+ conn->rx_len -= skb->len;
+ return;
+
+ case ISO_END:
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
+ conn->rx_len -= skb->len;
+
+ if (!conn->rx_len) {
+ struct sk_buff *rx_skb = conn->rx_skb;
+
+ /* Complete frame received. iso_recv_frame
+ * takes ownership of the skb so set the global
+ * rx_skb pointer to NULL first.
+ */
+ conn->rx_skb = NULL;
+ iso_recv_frame(conn, rx_skb);
+ }
+ break;
+ }
+
+drop:
+ kfree_skb(skb);
+}
+
+static struct hci_cb iso_cb = {
+ .name = "ISO",
+ .connect_cfm = iso_connect_cfm,
+ .disconn_cfm = iso_disconn_cfm,
+};
+
+static int iso_debugfs_show(struct seq_file *f, void *p)
+{
+ struct sock *sk;
+
+ read_lock(&iso_sk_list.lock);
+
+ sk_for_each(sk, &iso_sk_list.head) {
+ seq_printf(f, "%pMR %pMR %d\n", &iso_pi(sk)->src,
+ &iso_pi(sk)->dst, sk->sk_state);
+ }
+
+ read_unlock(&iso_sk_list.lock);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(iso_debugfs);
+
+static struct dentry *iso_debugfs;
+
+static const struct proto_ops iso_sock_ops = {
+ .family = PF_BLUETOOTH,
+ .owner = THIS_MODULE,
+ .release = iso_sock_release,
+ .bind = iso_sock_bind,
+ .connect = iso_sock_connect,
+ .listen = iso_sock_listen,
+ .accept = iso_sock_accept,
+ .getname = iso_sock_getname,
+ .sendmsg = iso_sock_sendmsg,
+ .recvmsg = iso_sock_recvmsg,
+ .poll = bt_sock_poll,
+ .ioctl = bt_sock_ioctl,
+ .mmap = sock_no_mmap,
+ .socketpair = sock_no_socketpair,
+ .shutdown = iso_sock_shutdown,
+ .setsockopt = iso_sock_setsockopt,
+ .getsockopt = iso_sock_getsockopt
+};
+
+static const struct net_proto_family iso_sock_family_ops = {
+ .family = PF_BLUETOOTH,
+ .owner = THIS_MODULE,
+ .create = iso_sock_create,
+};
+
+static bool iso_inited;
+
+bool iso_enabled(void)
+{
+ return iso_inited;
+}
+
+int iso_init(void)
+{
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct sockaddr_iso) > sizeof(struct sockaddr));
+
+ if (iso_inited)
+ return -EALREADY;
+
+ err = proto_register(&iso_proto, 0);
+ if (err < 0)
+ return err;
+
+ err = bt_sock_register(BTPROTO_ISO, &iso_sock_family_ops);
+ if (err < 0) {
+ BT_ERR("ISO socket registration failed");
+ goto error;
+ }
+
+ err = bt_procfs_init(&init_net, "iso", &iso_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create ISO proc file");
+ bt_sock_unregister(BTPROTO_ISO);
+ goto error;
+ }
+
+ BT_INFO("ISO socket layer initialized");
+
+ hci_register_cb(&iso_cb);
+
+ if (IS_ERR_OR_NULL(bt_debugfs))
+ return 0;
+
+ if (!iso_debugfs) {
+ iso_debugfs = debugfs_create_file("iso", 0444, bt_debugfs,
+ NULL, &iso_debugfs_fops);
+ }
+
+ iso_inited = true;
+
+ return 0;
+
+error:
+ proto_unregister(&iso_proto);
+ return err;
+}
+
+int iso_exit(void)
+{
+ if (!iso_inited)
+ return -EALREADY;
+
+ bt_procfs_cleanup(&init_net, "iso");
+
+ debugfs_remove(iso_debugfs);
+ iso_debugfs = NULL;
+
+ hci_unregister_cb(&iso_cb);
+
+ bt_sock_unregister(BTPROTO_ISO);
+
+ proto_unregister(&iso_proto);
+
+ iso_inited = false;
+
+ return 0;
+}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index e817ff0607a0..9c24947aa41e 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -61,6 +61,9 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff_head *skbs, u8 event);
+static void l2cap_retrans_timeout(struct work_struct *work);
+static void l2cap_monitor_timeout(struct work_struct *work);
+static void l2cap_ack_timeout(struct work_struct *work);
static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type)
{
@@ -111,7 +114,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,
}
/* Find channel with given SCID.
- * Returns locked channel. */
+ * Returns a reference locked channel.
+ */
static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
u16 cid)
{
@@ -119,15 +123,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
}
/* Find channel with given DCID.
- * Returns locked channel.
+ * Returns a reference locked channel.
*/
static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
u16 cid)
@@ -136,8 +144,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -162,8 +174,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_ident(conn, ident);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -463,6 +479,9 @@ struct l2cap_chan *l2cap_chan_create(void)
write_unlock(&chan_list_lock);
INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout);
+ INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
+ INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
+ INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
chan->state = BT_OPEN;
@@ -497,6 +516,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
kref_get(&c->kref);
}
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+
+ if (!kref_get_unless_zero(&c->kref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_chan_put(struct l2cap_chan *c)
{
BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
@@ -1369,6 +1398,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan)
l2cap_le_flowctl_init(chan, 0);
+ memset(&req, 0, sizeof(req));
req.psm = chan->psm;
req.scid = cpu_to_le16(chan->scid);
req.mtu = cpu_to_le16(chan->imtu);
@@ -1436,6 +1466,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
l2cap_ecred_init(chan, 0);
+ memset(&data, 0, sizeof(data));
data.pdu.req.psm = chan->psm;
data.pdu.req.mtu = cpu_to_le16(chan->imtu);
data.pdu.req.mps = cpu_to_le16(chan->mps);
@@ -1443,7 +1474,6 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
data.pdu.scid[0] = cpu_to_le16(chan->scid);
chan->ident = l2cap_get_ident(conn);
- data.pid = chan->ops->get_peer_pid(chan);
data.count = 1;
data.chan = chan;
@@ -1946,11 +1976,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
bdaddr_t *dst,
u8 link_type)
{
- struct l2cap_chan *c, *c1 = NULL;
+ struct l2cap_chan *c, *tmp, *c1 = NULL;
read_lock(&chan_list_lock);
- list_for_each_entry(c, &chan_list, global_l) {
+ list_for_each_entry_safe(c, tmp, &chan_list, global_l) {
if (state && c->state != state)
continue;
@@ -1960,7 +1990,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
if (link_type == LE_LINK && c->src_type == BDADDR_BREDR)
continue;
- if (c->psm == psm) {
+ if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) {
int src_match, dst_match;
int src_any, dst_any;
@@ -1968,7 +1998,9 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
- l2cap_chan_hold(c);
+ if (!l2cap_chan_hold_unless_zero(c))
+ continue;
+
read_unlock(&chan_list_lock);
return c;
}
@@ -1983,7 +2015,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
}
if (c1)
- l2cap_chan_hold(c1);
+ c1 = l2cap_chan_hold_unless_zero(c1);
read_unlock(&chan_list_lock);
@@ -3294,10 +3326,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan)
chan->rx_state = L2CAP_RX_STATE_RECV;
chan->tx_state = L2CAP_TX_STATE_XMIT;
- INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
- INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
- INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
-
skb_queue_head_init(&chan->srej_q);
err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win);
@@ -3736,7 +3764,8 @@ done:
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
- if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
+ if (remote_efs &&
+ test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
chan->remote_id = efs.id;
chan->remote_stype = efs.stype;
chan->remote_msdu = le16_to_cpu(efs.msdu);
@@ -4281,6 +4310,12 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
}
}
+ chan = l2cap_chan_hold_unless_zero(chan);
+ if (!chan) {
+ err = -EBADSLT;
+ goto unlock;
+ }
+
err = 0;
l2cap_chan_lock(chan);
@@ -4310,6 +4345,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
unlock:
mutex_unlock(&conn->chan_lock);
@@ -4463,6 +4499,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -4577,6 +4614,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -5304,6 +5342,7 @@ send_move_response:
l2cap_send_move_chan_rsp(chan, result);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5396,6 +5435,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
@@ -5425,6 +5465,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
@@ -5488,6 +5529,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5523,6 +5565,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5771,6 +5814,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm),
scid, mtu, mps);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
+ * page 1059:
+ *
+ * Valid range: 0x0001-0x00ff
+ *
+ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
+ */
+ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
+ result = L2CAP_CR_LE_BAD_PSM;
+ chan = NULL;
+ goto response;
+ }
+
/* Check if we have socket listening on psm */
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
&conn->hcon->dst, LE_LINK);
@@ -5895,12 +5951,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (credits > max_credits) {
BT_ERR("LE credits overflow");
l2cap_send_disconn_req(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
/* Return 0 so that we don't trigger an unnecessary
* command reject packet.
*/
- return 0;
+ goto unlock;
}
chan->tx_credits += credits;
@@ -5911,7 +5966,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (chan->tx_credits)
chan->ops->resume(chan);
+unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5958,6 +6015,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
psm = req->psm;
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
+ * page 1059:
+ *
+ * Valid range: 0x0001-0x00ff
+ *
+ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
+ */
+ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
+ result = L2CAP_CR_LE_BAD_PSM;
+ goto response;
+ }
+
BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps);
memset(&pdu, 0, sizeof(pdu));
@@ -6842,6 +6911,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
struct l2cap_ctrl *control,
struct sk_buff *skb, u8 event)
{
+ struct l2cap_ctrl local_control;
int err = 0;
bool skb_in_use = false;
@@ -6866,15 +6936,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
chan->buffer_seq = chan->expected_tx_seq;
skb_in_use = true;
+ /* l2cap_reassemble_sdu may free skb, hence invalidate
+ * control, so make a copy in advance to use it after
+ * l2cap_reassemble_sdu returns and to avoid the race
+ * condition, for example:
+ *
+ * The current thread calls:
+ * l2cap_reassemble_sdu
+ * chan->ops->recv == l2cap_sock_recv_cb
+ * __sock_queue_rcv_skb
+ * Another thread calls:
+ * bt_sock_recvmsg
+ * skb_recv_datagram
+ * skb_free_datagram
+ * Then the current thread tries to access control, but
+ * it was freed by skb_free_datagram.
+ */
+ local_control = *control;
err = l2cap_reassemble_sdu(chan, skb, control);
if (err)
break;
- if (control->final) {
+ if (local_control.final) {
if (!test_and_clear_bit(CONN_REJ_ACT,
&chan->conn_state)) {
- control->final = 0;
- l2cap_retransmit_all(chan, control);
+ local_control.final = 0;
+ l2cap_retransmit_all(chan, &local_control);
l2cap_ertm_send(chan);
}
}
@@ -7254,11 +7341,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff *skb)
{
+ /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store
+ * the txseq field in advance to use it after l2cap_reassemble_sdu
+ * returns and to avoid the race condition, for example:
+ *
+ * The current thread calls:
+ * l2cap_reassemble_sdu
+ * chan->ops->recv == l2cap_sock_recv_cb
+ * __sock_queue_rcv_skb
+ * Another thread calls:
+ * bt_sock_recvmsg
+ * skb_recv_datagram
+ * skb_free_datagram
+ * Then the current thread tries to access control, but it was freed by
+ * skb_free_datagram.
+ */
+ u16 txseq = control->txseq;
+
BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb,
chan->rx_state);
- if (l2cap_classify_txseq(chan, control->txseq) ==
- L2CAP_TXSEQ_EXPECTED) {
+ if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) {
l2cap_pass_to_tx(chan, control);
BT_DBG("buffer_seq %u->%u", chan->buffer_seq,
@@ -7281,8 +7384,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
}
}
- chan->last_acked_seq = control->txseq;
- chan->expected_tx_seq = __next_seq(chan, control->txseq);
+ chan->last_acked_seq = txseq;
+ chan->expected_tx_seq = __next_seq(chan, txseq);
return 0;
}
@@ -7538,6 +7641,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
return;
}
+ l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
} else {
BT_DBG("unknown cid 0x%4.4x", cid);
@@ -7597,6 +7701,7 @@ drop:
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
@@ -8085,7 +8190,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
if (src_type != c->src_type)
continue;
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
read_unlock(&chan_list_lock);
return c;
}
@@ -8382,9 +8487,8 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
* expected length.
*/
if (skb->len < L2CAP_LEN_SIZE) {
- if (l2cap_recv_frag(conn, skb, conn->mtu) < 0)
- goto drop;
- return;
+ l2cap_recv_frag(conn, skb, conn->mtu);
+ break;
}
len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE;
@@ -8428,7 +8532,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
/* Header still could not be read just continue */
if (conn->rx_skb->len < L2CAP_LEN_SIZE)
- return;
+ break;
}
if (skb->len > conn->rx_len) {
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 5326f41a58b7..469a0c95b6e8 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -135,6 +135,77 @@ int bt_to_errno(__u16 code)
}
EXPORT_SYMBOL(bt_to_errno);
+/* Unix errno to Bluetooth error codes mapping */
+__u8 bt_status(int err)
+{
+ /* Don't convert if already positive value */
+ if (err >= 0)
+ return err;
+
+ switch (err) {
+ case -EBADRQC:
+ return 0x01;
+
+ case -ENOTCONN:
+ return 0x02;
+
+ case -EIO:
+ return 0x03;
+
+ case -EHOSTDOWN:
+ return 0x04;
+
+ case -EACCES:
+ return 0x05;
+
+ case -EBADE:
+ return 0x06;
+
+ case -ENOMEM:
+ return 0x07;
+
+ case -ETIMEDOUT:
+ return 0x08;
+
+ case -EMLINK:
+ return 0x09;
+
+ case EALREADY:
+ return 0x0b;
+
+ case -EBUSY:
+ return 0x0c;
+
+ case -ECONNREFUSED:
+ return 0x0d;
+
+ case -EOPNOTSUPP:
+ return 0x11;
+
+ case -EINVAL:
+ return 0x12;
+
+ case -ECONNRESET:
+ return 0x13;
+
+ case -ECONNABORTED:
+ return 0x16;
+
+ case ELOOP:
+ return 0x17;
+
+ case -EPROTONOSUPPORT:
+ return 0x1a;
+
+ case -EPROTO:
+ return 0x19;
+
+ default:
+ return 0x1f;
+ }
+}
+EXPORT_SYMBOL(bt_status);
+
void bt_info(const char *format, ...)
{
struct va_format vaf;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 37087cf7dc5a..a92e7e485feb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -42,7 +42,7 @@
#include "aosp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 21
+#define MGMT_REVISION 22
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -129,6 +129,10 @@ static const u16 mgmt_commands[] = {
MGMT_OP_ADD_EXT_ADV_PARAMS,
MGMT_OP_ADD_EXT_ADV_DATA,
MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI,
+ MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_OP_MESH_READ_FEATURES,
+ MGMT_OP_MESH_SEND,
+ MGMT_OP_MESH_SEND_CANCEL,
};
static const u16 mgmt_events[] = {
@@ -174,6 +178,8 @@ static const u16 mgmt_events[] = {
MGMT_EV_ADV_MONITOR_REMOVED,
MGMT_EV_CONTROLLER_SUSPEND,
MGMT_EV_CONTROLLER_RESUME,
+ MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+ MGMT_EV_ADV_MONITOR_DEVICE_LOST,
};
static const u16 mgmt_untrusted_commands[] = {
@@ -1021,13 +1027,99 @@ static void rpa_expired(struct work_struct *work)
hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL);
}
+static void discov_off(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ discov_off.work);
+
+ bt_dev_dbg(hdev, "");
+
+ hci_dev_lock(hdev);
+
+ /* When discoverable timeout triggers, then just make sure
+ * the limited discoverable flag is cleared. Even in the case
+ * of a timeout triggered from general discoverable, it is
+ * safe to unconditionally clear the flag.
+ */
+ hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
+ hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
+ hdev->discov_timeout = 0;
+
+ hci_update_discoverable(hdev);
+
+ mgmt_new_settings(hdev);
+
+ hci_dev_unlock(hdev);
+}
+
+static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev);
+
+static void mesh_send_complete(struct hci_dev *hdev,
+ struct mgmt_mesh_tx *mesh_tx, bool silent)
+{
+ u8 handle = mesh_tx->handle;
+
+ if (!silent)
+ mgmt_event(MGMT_EV_MESH_PACKET_CMPLT, hdev, &handle,
+ sizeof(handle), NULL);
+
+ mgmt_mesh_remove(mesh_tx);
+}
+
+static int mesh_send_done_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_mesh_tx *mesh_tx;
+
+ hci_dev_clear_flag(hdev, HCI_MESH_SENDING);
+ hci_disable_advertising_sync(hdev);
+ mesh_tx = mgmt_mesh_next(hdev, NULL);
+
+ if (mesh_tx)
+ mesh_send_complete(hdev, mesh_tx, false);
+
+ return 0;
+}
+
+static int mesh_send_sync(struct hci_dev *hdev, void *data);
+static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err);
+static void mesh_next(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_mesh_tx *mesh_tx = mgmt_mesh_next(hdev, NULL);
+
+ if (!mesh_tx)
+ return;
+
+ err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx,
+ mesh_send_start_complete);
+
+ if (err < 0)
+ mesh_send_complete(hdev, mesh_tx, false);
+ else
+ hci_dev_set_flag(hdev, HCI_MESH_SENDING);
+}
+
+static void mesh_send_done(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ mesh_send_done.work);
+
+ if (!hci_dev_test_flag(hdev, HCI_MESH_SENDING))
+ return;
+
+ hci_cmd_sync_queue(hdev, mesh_send_done_sync, NULL, mesh_next);
+}
+
static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
{
- if (hci_dev_test_and_set_flag(hdev, HCI_MGMT))
+ if (hci_dev_test_flag(hdev, HCI_MGMT))
return;
+ BT_INFO("MGMT ver %d.%d", MGMT_VERSION, MGMT_REVISION);
+
+ INIT_DELAYED_WORK(&hdev->discov_off, discov_off);
INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off);
INIT_DELAYED_WORK(&hdev->rpa_expired, rpa_expired);
+ INIT_DELAYED_WORK(&hdev->mesh_send_done, mesh_send_done);
/* Non-mgmt controlled devices get this bit set
* implicitly so that pairing works for them, however
@@ -1035,6 +1127,8 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
* it
*/
hci_dev_clear_flag(hdev, HCI_BONDABLE);
+
+ hci_dev_set_flag(hdev, HCI_MGMT);
}
static int read_controller_info(struct sock *sk, struct hci_dev *hdev,
@@ -1080,11 +1174,11 @@ static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir)
eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE,
hdev->appearance);
- name_len = strlen(hdev->dev_name);
+ name_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name));
eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE,
hdev->dev_name, name_len);
- name_len = strlen(hdev->short_name);
+ name_len = strnlen(hdev->short_name, sizeof(hdev->short_name));
eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT,
hdev->short_name, name_len);
@@ -1218,7 +1312,13 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip)
static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_mode *cp = cmd->param;
+ struct mgmt_mode *cp;
+
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
+ cp = cmd->param;
bt_dev_dbg(hdev, "err %d", err);
@@ -1242,7 +1342,7 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
mgmt_status(err));
}
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
}
static int set_powered_sync(struct hci_dev *hdev, void *data)
@@ -1281,7 +1381,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_POWERED, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1290,6 +1390,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd,
mgmt_set_powered_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1383,6 +1486,10 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1402,7 +1509,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_dev_unlock(hdev);
}
@@ -1511,7 +1618,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1538,6 +1645,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd,
mgmt_set_discoverable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1550,6 +1660,10 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1562,7 +1676,9 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ if (cmd)
+ mgmt_pending_remove(cmd);
+
hci_dev_unlock(hdev);
}
@@ -1587,7 +1703,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
return err;
if (changed) {
- hci_req_update_scan(hdev);
+ hci_update_scan(hdev);
hci_update_passive_scan(hdev);
return new_settings(hdev, sk);
}
@@ -1634,7 +1750,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1654,6 +1770,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd,
mgmt_set_connectable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1774,6 +1893,10 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
u8 enable = cp->val;
bool changed;
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_SSP, hdev))
+ return;
+
if (err) {
u8 mgmt_err = mgmt_status(err);
@@ -1999,6 +2122,8 @@ static int set_le_sync(struct hci_dev *hdev, void *data)
int err;
if (!val) {
+ hci_clear_adv_instance_sync(hdev, NULL, 0x00, true);
+
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
hci_disable_advertising_sync(hdev);
@@ -2033,6 +2158,317 @@ static int set_le_sync(struct hci_dev *hdev, void *data)
return err;
}
+static void set_mesh_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ u8 status = mgmt_status(err);
+ struct sock *sk = cmd->sk;
+
+ if (status) {
+ mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev,
+ cmd_status_rsp, &status);
+ return;
+ }
+
+ mgmt_pending_remove(cmd);
+ mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0);
+}
+
+static int set_mesh_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_set_mesh *cp = cmd->param;
+ size_t len = cmd->param_len;
+
+ memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types));
+
+ if (cp->enable)
+ hci_dev_set_flag(hdev, HCI_MESH);
+ else
+ hci_dev_clear_flag(hdev, HCI_MESH);
+
+ len -= sizeof(*cp);
+
+ /* If filters don't fit, forward all adv pkts */
+ if (len <= sizeof(hdev->mesh_ad_types))
+ memcpy(hdev->mesh_ad_types, cp->ad_types, len);
+
+ hci_update_passive_scan_sync(hdev);
+ return 0;
+}
+
+static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
+{
+ struct mgmt_cp_set_mesh *cp = data;
+ struct mgmt_pending_cmd *cmd;
+ int err = 0;
+
+ bt_dev_dbg(hdev, "sock %p", sk);
+
+ if (!lmp_le_capable(hdev) ||
+ !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ if (cp->enable != 0x00 && cp->enable != 0x01)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len);
+ if (!cmd)
+ err = -ENOMEM;
+ else
+ err = hci_cmd_sync_queue(hdev, set_mesh_sync, cmd,
+ set_mesh_complete);
+
+ if (err < 0) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_FAILED);
+
+ if (cmd)
+ mgmt_pending_remove(cmd);
+ }
+
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_mesh_tx *mesh_tx = data;
+ struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param;
+ unsigned long mesh_send_interval;
+ u8 mgmt_err = mgmt_status(err);
+
+ /* Report any errors here, but don't report completion */
+
+ if (mgmt_err) {
+ hci_dev_clear_flag(hdev, HCI_MESH_SENDING);
+ /* Send Complete Error Code for handle */
+ mesh_send_complete(hdev, mesh_tx, false);
+ return;
+ }
+
+ mesh_send_interval = msecs_to_jiffies((send->cnt) * 25);
+ queue_delayed_work(hdev->req_workqueue, &hdev->mesh_send_done,
+ mesh_send_interval);
+}
+
+static int mesh_send_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_mesh_tx *mesh_tx = data;
+ struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param;
+ struct adv_info *adv, *next_instance;
+ u8 instance = hdev->le_num_of_adv_sets + 1;
+ u16 timeout, duration;
+ int err = 0;
+
+ if (hdev->le_num_of_adv_sets <= hdev->adv_instance_cnt)
+ return MGMT_STATUS_BUSY;
+
+ timeout = 1000;
+ duration = send->cnt * INTERVAL_TO_MS(hdev->le_adv_max_interval);
+ adv = hci_add_adv_instance(hdev, instance, 0,
+ send->adv_data_len, send->adv_data,
+ 0, NULL,
+ timeout, duration,
+ HCI_ADV_TX_POWER_NO_PREFERENCE,
+ hdev->le_adv_min_interval,
+ hdev->le_adv_max_interval,
+ mesh_tx->handle);
+
+ if (!IS_ERR(adv))
+ mesh_tx->instance = instance;
+ else
+ err = PTR_ERR(adv);
+
+ if (hdev->cur_adv_instance == instance) {
+ /* If the currently advertised instance is being changed then
+ * cancel the current advertising and schedule the next
+ * instance. If there is only one instance then the overridden
+ * advertising data will be visible right away.
+ */
+ cancel_adv_timeout(hdev);
+
+ next_instance = hci_get_next_instance(hdev, instance);
+ if (next_instance)
+ instance = next_instance->instance;
+ else
+ instance = 0;
+ } else if (hdev->adv_instance_timeout) {
+ /* Immediately advertise the new instance if no other, or
+ * let it go naturally from queue if ADV is already happening
+ */
+ instance = 0;
+ }
+
+ if (instance)
+ return hci_schedule_adv_instance_sync(hdev, instance, true);
+
+ return err;
+}
+
+static void send_count(struct mgmt_mesh_tx *mesh_tx, void *data)
+{
+ struct mgmt_rp_mesh_read_features *rp = data;
+
+ if (rp->used_handles >= rp->max_handles)
+ return;
+
+ rp->handles[rp->used_handles++] = mesh_tx->handle;
+}
+
+static int mesh_features(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_rp_mesh_read_features rp;
+
+ if (!lmp_le_capable(hdev) ||
+ !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ memset(&rp, 0, sizeof(rp));
+ rp.index = cpu_to_le16(hdev->id);
+ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ rp.max_handles = MESH_HANDLES_MAX;
+
+ hci_dev_lock(hdev);
+
+ if (rp.max_handles)
+ mgmt_mesh_foreach(hdev, send_count, &rp, sk);
+
+ mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, 0, &rp,
+ rp.used_handles + sizeof(rp) - MESH_HANDLES_MAX);
+
+ hci_dev_unlock(hdev);
+ return 0;
+}
+
+static int send_cancel(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_mesh_send_cancel *cancel = (void *)cmd->param;
+ struct mgmt_mesh_tx *mesh_tx;
+
+ if (!cancel->handle) {
+ do {
+ mesh_tx = mgmt_mesh_next(hdev, cmd->sk);
+
+ if (mesh_tx)
+ mesh_send_complete(hdev, mesh_tx, false);
+ } while (mesh_tx);
+ } else {
+ mesh_tx = mgmt_mesh_find(hdev, cancel->handle);
+
+ if (mesh_tx && mesh_tx->sk == cmd->sk)
+ mesh_send_complete(hdev, mesh_tx, false);
+ }
+
+ mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL,
+ 0, NULL, 0);
+ mgmt_pending_free(cmd);
+
+ return 0;
+}
+
+static int mesh_send_cancel(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_pending_cmd *cmd;
+ int err;
+
+ if (!lmp_le_capable(hdev) ||
+ !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL,
+ MGMT_STATUS_NOT_SUPPORTED);
+
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL,
+ MGMT_STATUS_REJECTED);
+
+ hci_dev_lock(hdev);
+ cmd = mgmt_pending_new(sk, MGMT_OP_MESH_SEND_CANCEL, hdev, data, len);
+ if (!cmd)
+ err = -ENOMEM;
+ else
+ err = hci_cmd_sync_queue(hdev, send_cancel, cmd, NULL);
+
+ if (err < 0) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL,
+ MGMT_STATUS_FAILED);
+
+ if (cmd)
+ mgmt_pending_free(cmd);
+ }
+
+ hci_dev_unlock(hdev);
+ return err;
+}
+
+static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
+{
+ struct mgmt_mesh_tx *mesh_tx;
+ struct mgmt_cp_mesh_send *send = data;
+ struct mgmt_rp_mesh_read_features rp;
+ bool sending;
+ int err = 0;
+
+ if (!lmp_le_capable(hdev) ||
+ !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
+ MGMT_STATUS_NOT_SUPPORTED);
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) ||
+ len <= MGMT_MESH_SEND_SIZE ||
+ len > (MGMT_MESH_SEND_SIZE + 31))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
+ MGMT_STATUS_REJECTED);
+
+ hci_dev_lock(hdev);
+
+ memset(&rp, 0, sizeof(rp));
+ rp.max_handles = MESH_HANDLES_MAX;
+
+ mgmt_mesh_foreach(hdev, send_count, &rp, sk);
+
+ if (rp.max_handles <= rp.used_handles) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
+ MGMT_STATUS_BUSY);
+ goto done;
+ }
+
+ sending = hci_dev_test_flag(hdev, HCI_MESH_SENDING);
+ mesh_tx = mgmt_mesh_add(sk, hdev, send, len);
+
+ if (!mesh_tx)
+ err = -ENOMEM;
+ else if (!sending)
+ err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx,
+ mesh_send_start_complete);
+
+ if (err < 0) {
+ bt_dev_err(hdev, "Send Mesh Failed %d", err);
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND,
+ MGMT_STATUS_FAILED);
+
+ if (mesh_tx) {
+ if (sending)
+ mgmt_mesh_remove(mesh_tx);
+ }
+ } else {
+ hci_dev_set_flag(hdev, HCI_MESH_SENDING);
+
+ mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_SEND, 0,
+ &mesh_tx->handle, 1);
+ }
+
+done:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_mode *cp = data;
@@ -2072,9 +2508,6 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
val = !!cp->val;
enabled = lmp_host_le_capable(hdev);
- if (!val)
- hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true);
-
if (!hdev_is_powered(hdev) || val == enabled) {
bool changed = false;
@@ -2267,7 +2700,9 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
struct mgmt_cp_remove_uuid *cp = data;
struct mgmt_pending_cmd *cmd;
struct bt_uuid *match, *tmp;
- u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static const u8 bt_uuid_any[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
int err, found;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -2495,6 +2930,37 @@ static int device_unpaired(struct hci_dev *hdev, bdaddr_t *bdaddr,
skip_sk);
}
+static void unpair_device_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_unpair_device *cp = cmd->param;
+
+ if (!err)
+ device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk);
+
+ cmd->cmd_complete(cmd, err);
+ mgmt_pending_free(cmd);
+}
+
+static int unpair_device_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_unpair_device *cp = cmd->param;
+ struct hci_conn *conn;
+
+ if (cp->addr.type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+ &cp->addr.bdaddr);
+ else
+ conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
+ if (!conn)
+ return 0;
+
+ return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
+}
+
static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -2605,7 +3071,7 @@ done:
goto unlock;
}
- cmd = mgmt_pending_add(sk, MGMT_OP_UNPAIR_DEVICE, hdev, cp,
+ cmd = mgmt_pending_new(sk, MGMT_OP_UNPAIR_DEVICE, hdev, cp,
sizeof(*cp));
if (!cmd) {
err = -ENOMEM;
@@ -2614,9 +3080,10 @@ done:
cmd->cmd_complete = addr_cmd_complete;
- err = hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+ err = hci_cmd_sync_queue(hdev, unpair_device_sync, cmd,
+ unpair_device_complete);
if (err < 0)
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
unlock:
hci_dev_unlock(hdev);
@@ -3093,6 +3560,18 @@ unlock:
return err;
}
+static int abort_conn_sync(struct hci_dev *hdev, void *data)
+{
+ struct hci_conn *conn;
+ u16 handle = PTR_ERR(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return 0;
+
+ return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
+}
+
static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -3143,7 +3622,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
le_addr_type(addr->type));
if (conn->conn_reason == CONN_REASON_PAIR_DEVICE)
- hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+ hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle),
+ NULL);
unlock:
hci_dev_unlock(hdev);
@@ -3321,6 +3801,9 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
+ if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
+ return;
+
if (status) {
mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
status);
@@ -3493,6 +3976,9 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err)
struct sk_buff *skb = cmd->skb;
u8 status = mgmt_status(err);
+ if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -3720,7 +4206,7 @@ static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data,
hci_blocked_keys_clear(hdev);
- for (i = 0; i < keys->key_count; ++i) {
+ for (i = 0; i < key_count; ++i) {
struct blocked_key *b = kzalloc(sizeof(*b), GFP_KERNEL);
if (!b) {
@@ -3759,13 +4245,6 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_WIDEBAND_SPEECH,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
if (hdev_is_powered(hdev) &&
!!cp->val != hci_dev_test_flag(hdev,
HCI_WIDEBAND_SPEECH_ENABLED)) {
@@ -3893,17 +4372,34 @@ static const u8 rpa_resolution_uuid[16] = {
0xea, 0x11, 0x73, 0xc2, 0x48, 0xa1, 0xc0, 0x15,
};
+/* 6fbaf188-05e0-496a-9885-d6ddfdb4e03e */
+static const u8 iso_socket_uuid[16] = {
+ 0x3e, 0xe0, 0xb4, 0xfd, 0xdd, 0xd6, 0x85, 0x98,
+ 0x6a, 0x49, 0xe0, 0x05, 0x88, 0xf1, 0xba, 0x6f,
+};
+
+/* 2ce463d7-7a03-4d8d-bf05-5f24e8f36e76 */
+static const u8 mgmt_mesh_uuid[16] = {
+ 0x76, 0x6e, 0xf3, 0xe8, 0x24, 0x5f, 0x05, 0xbf,
+ 0x8d, 0x4d, 0x03, 0x7a, 0xd7, 0x63, 0xe4, 0x2c,
+};
+
static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
void *data, u16 data_len)
{
- char buf[102]; /* Enough space for 5 features: 2 + 20 * 5 */
- struct mgmt_rp_read_exp_features_info *rp = (void *)buf;
+ struct mgmt_rp_read_exp_features_info *rp;
+ size_t len;
u16 idx = 0;
u32 flags;
+ int status;
bt_dev_dbg(hdev, "sock %p", sk);
- memset(&buf, 0, sizeof(buf));
+ /* Enough space for 7 features */
+ len = sizeof(*rp) + (sizeof(rp->features[0]) * 7);
+ rp = kzalloc(len, GFP_KERNEL);
+ if (!rp)
+ return -ENOMEM;
#ifdef CONFIG_BT_FEATURE_DEBUG
if (!hdev) {
@@ -3960,6 +4456,24 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
idx++;
}
+ if (IS_ENABLED(CONFIG_BT_LE)) {
+ flags = iso_enabled() ? BIT(0) : 0;
+ memcpy(rp->features[idx].uuid, iso_socket_uuid, 16);
+ rp->features[idx].flags = cpu_to_le32(flags);
+ idx++;
+ }
+
+ if (hdev && lmp_le_capable(hdev)) {
+ if (hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL))
+ flags = BIT(0);
+ else
+ flags = 0;
+
+ memcpy(rp->features[idx].uuid, mgmt_mesh_uuid, 16);
+ rp->features[idx].flags = cpu_to_le32(flags);
+ idx++;
+ }
+
rp->feature_count = cpu_to_le16(idx);
/* After reading the experimental features information, enable
@@ -3967,9 +4481,12 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
*/
hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
- return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
- MGMT_OP_READ_EXP_FEATURES_INFO,
- 0, rp, sizeof(*rp) + (20 * idx));
+ status = mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+ MGMT_OP_READ_EXP_FEATURES_INFO,
+ 0, rp, sizeof(*rp) + (20 * idx));
+
+ kfree(rp);
+ return status;
}
static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev,
@@ -3981,10 +4498,11 @@ static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev,
memcpy(ev.uuid, rpa_resolution_uuid, 16);
ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1));
+ // Do we need to be atomic with the conn_flags?
if (enabled && privacy_mode_capable(hdev))
- set_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags);
+ hdev->conn_flags |= HCI_CONN_FLAG_DEVICE_PRIVACY;
else
- clear_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags);
+ hdev->conn_flags &= ~HCI_CONN_FLAG_DEVICE_PRIVACY;
return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev,
&ev, sizeof(ev),
@@ -4096,6 +4614,63 @@ static int set_debug_func(struct sock *sk, struct hci_dev *hdev,
}
#endif
+static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp, u16 data_len)
+{
+ struct mgmt_rp_set_exp_feature rp;
+ bool val, changed;
+ int err;
+
+ /* Command requires to use the controller index */
+ if (!hdev)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
+
+ /* Changes can only be made when controller is powered down */
+ if (hdev_is_powered(hdev))
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_REJECTED);
+
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ val = !!cp->param[0];
+
+ if (val) {
+ changed = !hci_dev_test_and_set_flag(hdev,
+ HCI_MESH_EXPERIMENTAL);
+ } else {
+ hci_dev_clear_flag(hdev, HCI_MESH);
+ changed = hci_dev_test_and_clear_flag(hdev,
+ HCI_MESH_EXPERIMENTAL);
+ }
+
+ memcpy(rp.uuid, mgmt_mesh_uuid, 16);
+ rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
+
+ if (changed)
+ exp_feature_changed(hdev, mgmt_mesh_uuid, val, sk);
+
+ return err;
+}
+
static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev,
struct mgmt_cp_set_exp_feature *cp,
u16 data_len)
@@ -4351,6 +4926,57 @@ static int set_le_simultaneous_roles_func(struct sock *sk, struct hci_dev *hdev,
return err;
}
+#ifdef CONFIG_BT_LE
+static int set_iso_socket_func(struct sock *sk, struct hci_dev *hdev,
+ struct mgmt_cp_set_exp_feature *cp, u16 data_len)
+{
+ struct mgmt_rp_set_exp_feature rp;
+ bool val, changed = false;
+ int err;
+
+ /* Command requires to use the non-controller index */
+ if (hdev)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
+
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ val = cp->param[0] ? true : false;
+ if (val)
+ err = iso_init();
+ else
+ err = iso_exit();
+
+ if (!err)
+ changed = true;
+
+ memcpy(rp.uuid, iso_socket_uuid, 16);
+ rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+ err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
+
+ if (changed)
+ exp_feature_changed(hdev, iso_socket_uuid, val, sk);
+
+ return err;
+}
+#endif
+
static const struct mgmt_exp_feature {
const u8 *uuid;
int (*set_func)(struct sock *sk, struct hci_dev *hdev,
@@ -4360,10 +4986,14 @@ static const struct mgmt_exp_feature {
#ifdef CONFIG_BT_FEATURE_DEBUG
EXP_FEAT(debug_uuid, set_debug_func),
#endif
+ EXP_FEAT(mgmt_mesh_uuid, set_mgmt_mesh_func),
EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func),
EXP_FEAT(quality_report_uuid, set_quality_report_func),
EXP_FEAT(offload_codecs_uuid, set_offload_codec_func),
EXP_FEAT(le_simultaneous_roles_uuid, set_le_simultaneous_roles_func),
+#ifdef CONFIG_BT_LE
+ EXP_FEAT(iso_socket_uuid, set_iso_socket_func),
+#endif
/* end with a null feature */
EXP_FEAT(NULL, NULL)
@@ -4387,6 +5017,22 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
MGMT_STATUS_NOT_SUPPORTED);
}
+static u32 get_params_flags(struct hci_dev *hdev,
+ struct hci_conn_params *params)
+{
+ u32 flags = hdev->conn_flags;
+
+ /* Devices using RPAs can only be programmed in the acceptlist if
+ * LL Privacy has been enable otherwise they cannot mark
+ * HCI_CONN_FLAG_REMOTE_WAKEUP.
+ */
+ if ((flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && !use_ll_privacy(hdev) &&
+ hci_find_irk_by_addr(hdev, &params->addr, params->addr_type))
+ flags &= ~HCI_CONN_FLAG_REMOTE_WAKEUP;
+
+ return flags;
+}
+
static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len)
{
@@ -4403,8 +5049,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = hdev->conn_flags;
memset(&rp, 0, sizeof(rp));
@@ -4415,17 +5060,15 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
if (!br_params)
goto done;
- bitmap_to_arr32(&current_flags, br_params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = br_params->flags;
} else {
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
le_addr_type(cp->addr.type));
-
if (!params)
goto done;
- bitmap_to_arr32(&current_flags, params->flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = get_params_flags(hdev, params);
+ current_flags = params->flags;
}
bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -4467,11 +5110,10 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
u32 current_flags = __le32_to_cpu(cp->current_flags);
bt_dev_dbg(hdev, "Set device flags %pMR (type 0x%x) = 0x%x",
- &cp->addr.bdaddr, cp->addr.type,
- __le32_to_cpu(current_flags));
+ &cp->addr.bdaddr, cp->addr.type, current_flags);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ // We should take hci_dev_lock() early, I think.. conn_flags can change
+ supported_flags = hdev->conn_flags;
if ((supported_flags | current_flags) != supported_flags) {
bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)",
@@ -4487,35 +5129,45 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
cp->addr.type);
if (br_params) {
- bitmap_from_u64(br_params->flags, current_flags);
+ br_params->flags = current_flags;
status = MGMT_STATUS_SUCCESS;
} else {
bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)",
&cp->addr.bdaddr, cp->addr.type);
}
- } else {
- params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
- le_addr_type(cp->addr.type));
- if (params) {
- bitmap_from_u64(params->flags, current_flags);
- status = MGMT_STATUS_SUCCESS;
- /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
- * has been set.
- */
- if (test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY,
- params->flags))
- hci_update_passive_scan(hdev);
- } else {
- bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
- &cp->addr.bdaddr,
- le_addr_type(cp->addr.type));
- }
+ goto unlock;
}
-done:
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+ if (!params) {
+ bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
+ &cp->addr.bdaddr, le_addr_type(cp->addr.type));
+ goto unlock;
+ }
+
+ supported_flags = get_params_flags(hdev, params);
+
+ if ((supported_flags | current_flags) != supported_flags) {
+ bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)",
+ current_flags, supported_flags);
+ goto unlock;
+ }
+
+ params->flags = current_flags;
+ status = MGMT_STATUS_SUCCESS;
+
+ /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
+ * has been set.
+ */
+ if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY)
+ hci_update_passive_scan(hdev);
+
+unlock:
hci_dev_unlock(hdev);
+done:
if (status == MGMT_STATUS_SUCCESS)
device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
@@ -4603,23 +5255,15 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
return err;
}
-int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
+static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
+ void *data, int status)
{
struct mgmt_rp_add_adv_patterns_monitor rp;
- struct mgmt_pending_cmd *cmd;
- struct adv_monitor *monitor;
- int err = 0;
+ struct mgmt_pending_cmd *cmd = data;
+ struct adv_monitor *monitor = cmd->user_data;
hci_dev_lock(hdev);
- cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev);
- if (!cmd) {
- cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev);
- if (!cmd)
- goto done;
- }
-
- monitor = cmd->user_data;
rp.monitor_handle = cpu_to_le16(monitor->handle);
if (!status) {
@@ -4630,26 +5274,29 @@ int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
hci_update_passive_scan(hdev);
}
- err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
- mgmt_status(status), &rp, sizeof(rp));
+ mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_status(status), &rp, sizeof(rp));
mgmt_pending_remove(cmd);
-done:
hci_dev_unlock(hdev);
- bt_dev_dbg(hdev, "add monitor %d complete, status %u",
+ bt_dev_dbg(hdev, "add monitor %d complete, status %d",
rp.monitor_handle, status);
+}
- return err;
+static int mgmt_add_adv_patterns_monitor_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct adv_monitor *monitor = cmd->user_data;
+
+ return hci_add_adv_monitor(hdev, monitor);
}
static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
struct adv_monitor *m, u8 status,
void *data, u16 len, u16 op)
{
- struct mgmt_rp_add_adv_patterns_monitor rp;
struct mgmt_pending_cmd *cmd;
int err;
- bool pending;
hci_dev_lock(hdev);
@@ -4671,31 +5318,17 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
}
cmd->user_data = m;
- pending = hci_add_adv_monitor(hdev, m, &err);
+ err = hci_cmd_sync_queue(hdev, mgmt_add_adv_patterns_monitor_sync, cmd,
+ mgmt_add_adv_patterns_monitor_complete);
if (err) {
- if (err == -ENOSPC || err == -ENOMEM)
+ if (err == -ENOMEM)
status = MGMT_STATUS_NO_RESOURCES;
- else if (err == -EINVAL)
- status = MGMT_STATUS_INVALID_PARAMS;
else
status = MGMT_STATUS_FAILED;
- mgmt_pending_remove(cmd);
goto unlock;
}
- if (!pending) {
- mgmt_pending_remove(cmd);
- rp.monitor_handle = cpu_to_le16(m->handle);
- mgmt_adv_monitor_added(sk, hdev, m->handle);
- m->state = ADV_MONITOR_STATE_REGISTERED;
- hdev->adv_monitors_cnt++;
-
- hci_dev_unlock(hdev);
- return mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_SUCCESS,
- &rp, sizeof(rp));
- }
-
hci_dev_unlock(hdev);
return 0;
@@ -4836,49 +5469,46 @@ done:
MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI);
}
-int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status)
+static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
+ void *data, int status)
{
struct mgmt_rp_remove_adv_monitor rp;
- struct mgmt_cp_remove_adv_monitor *cp;
- struct mgmt_pending_cmd *cmd;
- int err = 0;
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_remove_adv_monitor *cp = cmd->param;
hci_dev_lock(hdev);
- cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
- if (!cmd)
- goto done;
-
- cp = cmd->param;
rp.monitor_handle = cp->monitor_handle;
if (!status)
hci_update_passive_scan(hdev);
- err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
- mgmt_status(status), &rp, sizeof(rp));
+ mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_status(status), &rp, sizeof(rp));
mgmt_pending_remove(cmd);
-done:
hci_dev_unlock(hdev);
- bt_dev_dbg(hdev, "remove monitor %d complete, status %u",
+ bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
rp.monitor_handle, status);
+}
- return err;
+static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_remove_adv_monitor *cp = cmd->param;
+ u16 handle = __le16_to_cpu(cp->monitor_handle);
+
+ if (!handle)
+ return hci_remove_all_adv_monitor(hdev);
+
+ return hci_remove_single_adv_monitor(hdev, handle);
}
static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
- struct mgmt_cp_remove_adv_monitor *cp = data;
- struct mgmt_rp_remove_adv_monitor rp;
struct mgmt_pending_cmd *cmd;
- u16 handle = __le16_to_cpu(cp->monitor_handle);
int err, status;
- bool pending;
-
- BT_DBG("request for %s", hdev->name);
- rp.monitor_handle = cp->monitor_handle;
hci_dev_lock(hdev);
@@ -4896,34 +5526,22 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (handle)
- pending = hci_remove_single_adv_monitor(hdev, handle, &err);
- else
- pending = hci_remove_all_adv_monitor(hdev, &err);
+ err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd,
+ mgmt_remove_adv_monitor_complete);
if (err) {
mgmt_pending_remove(cmd);
- if (err == -ENOENT)
- status = MGMT_STATUS_INVALID_INDEX;
+ if (err == -ENOMEM)
+ status = MGMT_STATUS_NO_RESOURCES;
else
status = MGMT_STATUS_FAILED;
goto unlock;
}
- /* monitor can be removed without forwarding request to controller */
- if (!pending) {
- mgmt_pending_remove(cmd);
- hci_dev_unlock(hdev);
-
- return mgmt_cmd_complete(sk, hdev->id,
- MGMT_OP_REMOVE_ADV_MONITOR,
- MGMT_STATUS_SUCCESS,
- &rp, sizeof(rp));
- }
-
hci_dev_unlock(hdev);
+
return 0;
unlock:
@@ -5036,12 +5654,6 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0);
if (!cmd)
err = -ENOMEM;
@@ -5261,11 +5873,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED:
DISCOVERY_FINDING);
@@ -5327,7 +5944,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
else
hdev->discovery.limited = false;
- cmd = mgmt_pending_new(sk, op, hdev, data, len);
+ cmd = mgmt_pending_add(sk, op, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -5336,7 +5953,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5430,7 +6047,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_START_SERVICE_DISCOVERY,
+ cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY,
hdev, data, len);
if (!cmd) {
err = -ENOMEM;
@@ -5463,7 +6080,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5495,11 +6112,14 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
if (!err)
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
@@ -5535,7 +6155,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto unlock;
@@ -5544,7 +6164,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd,
stop_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto unlock;
}
@@ -5831,6 +6451,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
if (!hdev_is_powered(hdev) ||
(val == hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
(cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) ||
+ hci_dev_test_flag(hdev, HCI_MESH) ||
hci_conn_num(hdev, LE_LINK) > 0 ||
(hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
hdev->le_scan_type == LE_SCAN_ACTIVE)) {
@@ -6666,11 +7287,6 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
&rp, sizeof(rp));
- if (conn) {
- hci_conn_drop(conn);
- hci_conn_put(conn);
- }
-
mgmt_pending_free(cmd);
}
@@ -6689,15 +7305,10 @@ static int get_conn_info_sync(struct hci_dev *hdev, void *data)
else
conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
- if (!conn || conn != cmd->user_data || conn->state != BT_CONNECTED) {
- if (cmd->user_data) {
- hci_conn_drop(cmd->user_data);
- hci_conn_put(cmd->user_data);
- cmd->user_data = NULL;
- }
+ if (!conn || conn->state != BT_CONNECTED)
return MGMT_STATUS_NOT_CONNECTED;
- }
+ cmd->user_data = conn;
handle = cpu_to_le16(conn->handle);
/* Refresh RSSI each time */
@@ -6776,11 +7387,12 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
cmd = mgmt_pending_new(sk, MGMT_OP_GET_CONN_INFO, hdev, data,
len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
- else
+ } else {
err = hci_cmd_sync_queue(hdev, get_conn_info_sync,
cmd, get_conn_info_complete);
+ }
if (err < 0) {
mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
@@ -6792,9 +7404,6 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- hci_conn_hold(conn);
- cmd->user_data = hci_conn_get(conn);
-
conn->conn_info_timestamp = jiffies;
} else {
/* Cache is valid, just reply with values cached in hci_conn */
@@ -6833,8 +7442,6 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err)
if (conn) {
rp.piconet_clock = cpu_to_le32(conn->clock);
rp.accuracy = cpu_to_le16(conn->clock_accuracy);
- hci_conn_drop(conn);
- hci_conn_put(conn);
}
complete:
@@ -6849,30 +7456,21 @@ static int get_clock_info_sync(struct hci_dev *hdev, void *data)
struct mgmt_pending_cmd *cmd = data;
struct mgmt_cp_get_clock_info *cp = cmd->param;
struct hci_cp_read_clock hci_cp;
- struct hci_conn *conn = cmd->user_data;
- int err;
+ struct hci_conn *conn;
memset(&hci_cp, 0, sizeof(hci_cp));
- err = hci_read_clock_sync(hdev, &hci_cp);
+ hci_read_clock_sync(hdev, &hci_cp);
- if (conn) {
- /* Make sure connection still exists */
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
- &cp->addr.bdaddr);
+ /* Make sure connection still exists */
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->addr.bdaddr);
+ if (!conn || conn->state != BT_CONNECTED)
+ return MGMT_STATUS_NOT_CONNECTED;
- if (conn && conn == cmd->user_data &&
- conn->state == BT_CONNECTED) {
- hci_cp.handle = cpu_to_le16(conn->handle);
- hci_cp.which = 0x01; /* Piconet clock */
- err = hci_read_clock_sync(hdev, &hci_cp);
- } else if (cmd->user_data) {
- hci_conn_drop(cmd->user_data);
- hci_conn_put(cmd->user_data);
- cmd->user_data = NULL;
- }
- }
+ cmd->user_data = conn;
+ hci_cp.handle = cpu_to_le16(conn->handle);
+ hci_cp.which = 0x01; /* Piconet clock */
- return err;
+ return hci_read_clock_sync(hdev, &hci_cp);
}
static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -6931,10 +7529,6 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data,
if (cmd)
mgmt_pending_free(cmd);
-
- } else if (conn) {
- hci_conn_hold(conn);
- cmd->user_data = hci_conn_get(conn);
}
@@ -7063,7 +7657,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
if (err)
goto unlock;
- hci_req_update_scan(hdev);
+ hci_update_scan(hdev);
goto added;
}
@@ -7102,8 +7696,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
addr_type);
if (params)
- bitmap_to_arr32(&current_flags, params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = params->flags;
}
err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL);
@@ -7112,8 +7705,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
added:
device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = hdev->conn_flags;
device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
@@ -7177,7 +7769,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- hci_req_update_scan(hdev);
+ hci_update_scan(hdev);
device_removed(sk, hdev, &cp->addr.bdaddr,
cp->addr.type);
@@ -7241,7 +7833,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
kfree(b);
}
- hci_req_update_scan(hdev);
+ hci_update_scan(hdev);
list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) {
if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
@@ -7474,6 +8066,9 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data,
u8 status = mgmt_status(err);
u16 eir_len;
+ if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -7785,8 +8380,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev)
/* In extended adv TX_POWER returned from Set Adv Param
* will be always valid.
*/
- if ((hdev->adv_tx_power != HCI_TX_POWER_INVALID) ||
- ext_adv_capable(hdev))
+ if (hdev->adv_tx_power != HCI_TX_POWER_INVALID || ext_adv_capable(hdev))
flags |= MGMT_ADV_FLAG_TX_POWER;
if (ext_adv_capable(hdev)) {
@@ -7839,8 +8433,14 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
instance = rp->instance;
list_for_each_entry(adv_instance, &hdev->adv_instances, list) {
- *instance = adv_instance->instance;
- instance++;
+ /* Only instances 1-le_num_of_adv_sets are externally visible */
+ if (adv_instance->instance <= hdev->adv_instance_cnt) {
+ *instance = adv_instance->instance;
+ instance++;
+ } else {
+ rp->num_instances--;
+ rp_len--;
+ }
}
hci_dev_unlock(hdev);
@@ -7918,7 +8518,7 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
return false;
/* Make sure that the data is correctly formatted. */
- for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) {
+ for (i = 0; i < len; i += (cur_len + 1)) {
cur_len = data[i];
if (!cur_len)
@@ -7969,11 +8569,7 @@ static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags)
static bool adv_busy(struct hci_dev *hdev)
{
- return (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev));
+ return pending_find(MGMT_OP_SET_LE, hdev);
}
static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance,
@@ -8046,9 +8642,9 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
u32 flags;
u8 status;
u16 timeout, duration;
- unsigned int prev_instance_cnt = hdev->adv_instance_cnt;
+ unsigned int prev_instance_cnt;
u8 schedule_instance = 0;
- struct adv_info *next_instance;
+ struct adv_info *adv, *next_instance;
int err;
struct mgmt_pending_cmd *cmd;
@@ -8097,15 +8693,17 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- err = hci_add_adv_instance(hdev, cp->instance, flags,
+ prev_instance_cnt = hdev->adv_instance_cnt;
+
+ adv = hci_add_adv_instance(hdev, cp->instance, flags,
cp->adv_data_len, cp->data,
cp->scan_rsp_len,
cp->data + cp->adv_data_len,
timeout, duration,
HCI_ADV_TX_POWER_NO_PREFERENCE,
hdev->le_adv_min_interval,
- hdev->le_adv_max_interval);
- if (err < 0) {
+ hdev->le_adv_max_interval, 0);
+ if (IS_ERR(adv)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_FAILED);
goto unlock;
@@ -8236,6 +8834,7 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev,
struct mgmt_cp_add_ext_adv_params *cp = data;
struct mgmt_rp_add_ext_adv_params rp;
struct mgmt_pending_cmd *cmd = NULL;
+ struct adv_info *adv;
u32 flags, min_interval, max_interval;
u16 timeout, duration;
u8 status;
@@ -8305,11 +8904,11 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev,
HCI_ADV_TX_POWER_NO_PREFERENCE;
/* Create advertising instance with no advertising or response data */
- err = hci_add_adv_instance(hdev, cp->instance, flags,
- 0, NULL, 0, NULL, timeout, duration,
- tx_power, min_interval, max_interval);
+ adv = hci_add_adv_instance(hdev, cp->instance, flags, 0, NULL, 0, NULL,
+ timeout, duration, tx_power, min_interval,
+ max_interval, 0);
- if (err < 0) {
+ if (IS_ERR(adv)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS,
MGMT_STATUS_FAILED);
goto unlock;
@@ -8563,9 +9162,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev)) {
+ if (pending_find(MGMT_OP_SET_LE, hdev)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_BUSY);
goto unlock;
@@ -8601,7 +9198,6 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
struct mgmt_cp_get_adv_size_info *cp = data;
struct mgmt_rp_get_adv_size_info rp;
u32 flags, supported_flags;
- int err;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -8628,10 +9224,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true);
rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false);
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
- MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
-
- return err;
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
}
static const struct hci_mgmt_handler mgmt_handlers[] = {
@@ -8758,8 +9352,13 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
{ add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE,
HCI_MGMT_VAR_LEN },
{ add_adv_patterns_monitor_rssi,
- MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE,
+ MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE },
+ { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE,
+ HCI_MGMT_VAR_LEN },
+ { mesh_features, MGMT_MESH_READ_FEATURES_SIZE },
+ { mesh_send, MGMT_MESH_SEND_SIZE,
HCI_MGMT_VAR_LEN },
+ { mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE },
};
void mgmt_index_added(struct hci_dev *hdev)
@@ -8827,6 +9426,13 @@ void mgmt_index_removed(struct hci_dev *hdev)
mgmt_index_event(MGMT_EV_EXT_INDEX_REMOVED, hdev, &ev, sizeof(ev),
HCI_MGMT_EXT_INDEX_EVENTS);
+
+ /* Cancel any remaining timed work */
+ if (!hci_dev_test_flag(hdev, HCI_MGMT))
+ return;
+ cancel_delayed_work_sync(&hdev->discov_off);
+ cancel_delayed_work_sync(&hdev->service_cache);
+ cancel_delayed_work_sync(&hdev->rpa_expired);
}
void mgmt_power_on(struct hci_dev *hdev, int err)
@@ -9059,12 +9665,14 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
u16 eir_len = 0;
u32 flags = 0;
+ /* allocate buff for LE or BR/EDR adv */
if (conn->le_adv_data_len > 0)
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
- conn->le_adv_data_len);
+ sizeof(*ev) + conn->le_adv_data_len);
else
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
- 2 + name_len + 5);
+ sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) +
+ eir_precalc_len(sizeof(conn->dev_class)));
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, &conn->dst);
@@ -9083,18 +9691,12 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
skb_put_data(skb, conn->le_adv_data, conn->le_adv_data_len);
eir_len = conn->le_adv_data_len;
} else {
- if (name_len > 0) {
- eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE,
- name, name_len);
- skb_put(skb, eir_len);
- }
+ if (name)
+ eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len);
- if (memcmp(conn->dev_class, "\0\0\0", 3) != 0) {
- eir_len = eir_append_data(ev->eir, eir_len,
- EIR_CLASS_OF_DEV,
- conn->dev_class, 3);
- skb_put(skb, 5);
- }
+ if (memcmp(conn->dev_class, "\0\0\0", sizeof(conn->dev_class)))
+ eir_len += eir_skb_put_data(skb, EIR_CLASS_OF_DEV,
+ conn->dev_class, sizeof(conn->dev_class));
}
ev->eir_len = cpu_to_le16(eir_len);
@@ -9589,12 +10191,192 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
return true;
}
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+ bdaddr_t *bdaddr, u8 addr_type)
+{
+ struct mgmt_ev_adv_monitor_device_lost ev;
+
+ ev.monitor_handle = cpu_to_le16(handle);
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = addr_type;
+
+ mgmt_event(MGMT_EV_ADV_MONITOR_DEVICE_LOST, hdev, &ev, sizeof(ev),
+ NULL);
+}
+
+static void mgmt_send_adv_monitor_device_found(struct hci_dev *hdev,
+ struct sk_buff *skb,
+ struct sock *skip_sk,
+ u16 handle)
+{
+ struct sk_buff *advmon_skb;
+ size_t advmon_skb_len;
+ __le16 *monitor_handle;
+
+ if (!skb)
+ return;
+
+ advmon_skb_len = (sizeof(struct mgmt_ev_adv_monitor_device_found) -
+ sizeof(struct mgmt_ev_device_found)) + skb->len;
+ advmon_skb = mgmt_alloc_skb(hdev, MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+ advmon_skb_len);
+ if (!advmon_skb)
+ return;
+
+ /* ADV_MONITOR_DEVICE_FOUND is similar to DEVICE_FOUND event except
+ * that it also has 'monitor_handle'. Make a copy of DEVICE_FOUND and
+ * store monitor_handle of the matched monitor.
+ */
+ monitor_handle = skb_put(advmon_skb, sizeof(*monitor_handle));
+ *monitor_handle = cpu_to_le16(handle);
+ skb_put_data(advmon_skb, skb->data, skb->len);
+
+ mgmt_event_skb(advmon_skb, skip_sk);
+}
+
+static void mgmt_adv_monitor_device_found(struct hci_dev *hdev,
+ bdaddr_t *bdaddr, bool report_device,
+ struct sk_buff *skb,
+ struct sock *skip_sk)
+{
+ struct monitored_device *dev, *tmp;
+ bool matched = false;
+ bool notified = false;
+
+ /* We have received the Advertisement Report because:
+ * 1. the kernel has initiated active discovery
+ * 2. if not, we have pend_le_reports > 0 in which case we are doing
+ * passive scanning
+ * 3. if none of the above is true, we have one or more active
+ * Advertisement Monitor
+ *
+ * For case 1 and 2, report all advertisements via MGMT_EV_DEVICE_FOUND
+ * and report ONLY one advertisement per device for the matched Monitor
+ * via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+ *
+ * For case 3, since we are not active scanning and all advertisements
+ * received are due to a matched Advertisement Monitor, report all
+ * advertisements ONLY via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+ */
+ if (report_device && !hdev->advmon_pend_notify) {
+ mgmt_event_skb(skb, skip_sk);
+ return;
+ }
+
+ hdev->advmon_pend_notify = false;
+
+ list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+ if (!bacmp(&dev->bdaddr, bdaddr)) {
+ matched = true;
+
+ if (!dev->notified) {
+ mgmt_send_adv_monitor_device_found(hdev, skb,
+ skip_sk,
+ dev->handle);
+ notified = true;
+ dev->notified = true;
+ }
+ }
+
+ if (!dev->notified)
+ hdev->advmon_pend_notify = true;
+ }
+
+ if (!report_device &&
+ ((matched && !notified) || !msft_monitor_supported(hdev))) {
+ /* Handle 0 indicates that we are not active scanning and this
+ * is a subsequent advertisement report for an already matched
+ * Advertisement Monitor or the controller offloading support
+ * is not available.
+ */
+ mgmt_send_adv_monitor_device_found(hdev, skb, skip_sk, 0);
+ }
+
+ if (report_device)
+ mgmt_event_skb(skb, skip_sk);
+ else
+ kfree_skb(skb);
+}
+
+static void mesh_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 addr_type, s8 rssi, u32 flags, u8 *eir,
+ u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len,
+ u64 instant)
+{
+ struct sk_buff *skb;
+ struct mgmt_ev_mesh_device_found *ev;
+ int i, j;
+
+ if (!hdev->mesh_ad_types[0])
+ goto accepted;
+
+ /* Scan for requested AD types */
+ if (eir_len > 0) {
+ for (i = 0; i + 1 < eir_len; i += eir[i] + 1) {
+ for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) {
+ if (!hdev->mesh_ad_types[j])
+ break;
+
+ if (hdev->mesh_ad_types[j] == eir[i + 1])
+ goto accepted;
+ }
+ }
+ }
+
+ if (scan_rsp_len > 0) {
+ for (i = 0; i + 1 < scan_rsp_len; i += scan_rsp[i] + 1) {
+ for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) {
+ if (!hdev->mesh_ad_types[j])
+ break;
+
+ if (hdev->mesh_ad_types[j] == scan_rsp[i + 1])
+ goto accepted;
+ }
+ }
+ }
+
+ return;
+
+accepted:
+ skb = mgmt_alloc_skb(hdev, MGMT_EV_MESH_DEVICE_FOUND,
+ sizeof(*ev) + eir_len + scan_rsp_len);
+ if (!skb)
+ return;
+
+ ev = skb_put(skb, sizeof(*ev));
+
+ bacpy(&ev->addr.bdaddr, bdaddr);
+ ev->addr.type = link_to_bdaddr(LE_LINK, addr_type);
+ ev->rssi = rssi;
+ ev->flags = cpu_to_le32(flags);
+ ev->instant = cpu_to_le64(instant);
+
+ if (eir_len > 0)
+ /* Copy EIR or advertising data into event */
+ skb_put_data(skb, eir, eir_len);
+
+ if (scan_rsp_len > 0)
+ /* Append scan response data to event */
+ skb_put_data(skb, scan_rsp, scan_rsp_len);
+
+ ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
+
+ mgmt_event_skb(skb, NULL);
+}
+
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
- u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
+ u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len,
+ u64 instant)
{
struct sk_buff *skb;
struct mgmt_ev_device_found *ev;
+ bool report_device = hci_discovery_active(hdev);
+
+ if (hci_dev_test_flag(hdev, HCI_MESH) && link_type == LE_LINK)
+ mesh_device_found(hdev, bdaddr, addr_type, rssi, flags,
+ eir, eir_len, scan_rsp, scan_rsp_len,
+ instant);
/* Don't send events for a non-kernel initiated discovery. With
* LE one exception is if we have pend_le_reports > 0 in which
@@ -9603,11 +10385,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
if (!hci_discovery_active(hdev)) {
if (link_type == ACL_LINK)
return;
- if (link_type == LE_LINK &&
- list_empty(&hdev->pend_le_reports) &&
- !hci_is_adv_monitoring(hdev)) {
+ if (link_type == LE_LINK && !list_empty(&hdev->pend_le_reports))
+ report_device = true;
+ else if (!hci_is_adv_monitoring(hdev))
return;
- }
}
if (hdev->discovery.result_filtering) {
@@ -9672,7 +10453,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
- mgmt_event_skb(skb, NULL);
+ mgmt_adv_monitor_device_found(hdev, bdaddr, report_device, skb, NULL);
}
void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
@@ -9680,28 +10461,21 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
{
struct sk_buff *skb;
struct mgmt_ev_device_found *ev;
- u16 eir_len;
- u32 flags;
+ u16 eir_len = 0;
+ u32 flags = 0;
- if (name_len)
- skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 2 + name_len);
- else
- skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 0);
+ skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND,
+ sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0));
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, bdaddr);
ev->addr.type = link_to_bdaddr(link_type, addr_type);
ev->rssi = rssi;
- if (name) {
- eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name,
- name_len);
- flags = 0;
- skb_put(skb, eir_len);
- } else {
- eir_len = 0;
+ if (name)
+ eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len);
+ else
flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED;
- }
ev->eir_len = cpu_to_le16(eir_len);
ev->flags = cpu_to_le32(flags);
@@ -9762,3 +10536,22 @@ void mgmt_exit(void)
{
hci_mgmt_chan_unregister(&chan);
}
+
+void mgmt_cleanup(struct sock *sk)
+{
+ struct mgmt_mesh_tx *mesh_tx;
+ struct hci_dev *hdev;
+
+ read_lock(&hci_dev_list_lock);
+
+ list_for_each_entry(hdev, &hci_dev_list, list) {
+ do {
+ mesh_tx = mgmt_mesh_next(hdev, sk);
+
+ if (mesh_tx)
+ mesh_send_complete(hdev, mesh_tx, true);
+ } while (mesh_tx);
+ }
+
+ read_unlock(&hci_dev_list_lock);
+}
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index edee60bbc7b4..0115f783bde8 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -77,11 +77,12 @@ int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag,
{
struct hci_dev *hdev;
struct mgmt_hdr *hdr;
- int len = skb->len;
+ int len;
if (!skb)
return -EINVAL;
+ len = skb->len;
hdev = bt_cb(skb)->mgmt.hdev;
/* Time stamp */
@@ -296,7 +297,7 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
- list_add(&cmd->list, &hdev->mgmt_pending);
+ list_add_tail(&cmd->list, &hdev->mgmt_pending);
return cmd;
}
@@ -313,3 +314,77 @@ void mgmt_pending_remove(struct mgmt_pending_cmd *cmd)
list_del(&cmd->list);
mgmt_pending_free(cmd);
}
+
+void mgmt_mesh_foreach(struct hci_dev *hdev,
+ void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data),
+ void *data, struct sock *sk)
+{
+ struct mgmt_mesh_tx *mesh_tx, *tmp;
+
+ list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) {
+ if (!sk || mesh_tx->sk == sk)
+ cb(mesh_tx, data);
+ }
+}
+
+struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk)
+{
+ struct mgmt_mesh_tx *mesh_tx;
+
+ if (list_empty(&hdev->mesh_pending))
+ return NULL;
+
+ list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) {
+ if (!sk || mesh_tx->sk == sk)
+ return mesh_tx;
+ }
+
+ return NULL;
+}
+
+struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle)
+{
+ struct mgmt_mesh_tx *mesh_tx;
+
+ if (list_empty(&hdev->mesh_pending))
+ return NULL;
+
+ list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) {
+ if (mesh_tx->handle == handle)
+ return mesh_tx;
+ }
+
+ return NULL;
+}
+
+struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_mesh_tx *mesh_tx;
+
+ mesh_tx = kzalloc(sizeof(*mesh_tx), GFP_KERNEL);
+ if (!mesh_tx)
+ return NULL;
+
+ hdev->mesh_send_ref++;
+ if (!hdev->mesh_send_ref)
+ hdev->mesh_send_ref++;
+
+ mesh_tx->handle = hdev->mesh_send_ref;
+ mesh_tx->index = hdev->id;
+ memcpy(mesh_tx->param, data, len);
+ mesh_tx->param_len = len;
+ mesh_tx->sk = sk;
+ sock_hold(sk);
+
+ list_add_tail(&mesh_tx->list, &hdev->mesh_pending);
+
+ return mesh_tx;
+}
+
+void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx)
+{
+ list_del(&mesh_tx->list);
+ sock_put(mesh_tx->sk);
+ kfree(mesh_tx);
+}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index 98e40395a383..6a8b7e84293d 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -20,6 +20,16 @@
SOFTWARE IS DISCLAIMED.
*/
+struct mgmt_mesh_tx {
+ struct list_head list;
+ int index;
+ size_t param_len;
+ struct sock *sk;
+ u8 handle;
+ u8 instance;
+ u8 param[sizeof(struct mgmt_cp_mesh_send) + 29];
+};
+
struct mgmt_pending_cmd {
struct list_head list;
u16 opcode;
@@ -59,3 +69,11 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
void *data, u16 len);
void mgmt_pending_free(struct mgmt_pending_cmd *cmd);
void mgmt_pending_remove(struct mgmt_pending_cmd *cmd);
+void mgmt_mesh_foreach(struct hci_dev *hdev,
+ void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data),
+ void *data, struct sock *sk);
+struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle);
+struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk);
+struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len);
+void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx);
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 6a943634b31a..bee6a4c656be 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -80,6 +80,14 @@ struct msft_rp_le_set_advertisement_filter_enable {
__u8 sub_opcode;
} __packed;
+#define MSFT_EV_LE_MONITOR_DEVICE 0x02
+struct msft_ev_le_monitor_device {
+ __u8 addr_type;
+ bdaddr_t bdaddr;
+ __u8 monitor_handle;
+ __u8 monitor_state;
+} __packed;
+
struct msft_monitor_advertisement_handle_data {
__u8 msft_handle;
__u16 mgmt_handle;
@@ -91,18 +99,11 @@ struct msft_data {
__u8 evt_prefix_len;
__u8 *evt_prefix;
struct list_head handle_map;
- __u16 pending_add_handle;
- __u16 pending_remove_handle;
__u8 resuming;
__u8 suspending;
__u8 filter_enabled;
};
-static int __msft_add_monitor_pattern(struct hci_dev *hdev,
- struct adv_monitor *monitor);
-static int __msft_remove_monitor(struct hci_dev *hdev,
- struct adv_monitor *monitor, u16 handle);
-
bool msft_monitor_supported(struct hci_dev *hdev)
{
return !!(msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR);
@@ -119,7 +120,10 @@ static bool read_supported_features(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
+ if (IS_ERR_OR_NULL(skb)) {
+ if (!skb)
+ skb = ERR_PTR(-EIO);
+
bt_dev_err(hdev, "Failed to read MSFT supported features (%ld)",
PTR_ERR(skb));
return false;
@@ -156,34 +160,6 @@ failed:
return false;
}
-static void reregister_monitor(struct hci_dev *hdev, int handle)
-{
- struct adv_monitor *monitor;
- struct msft_data *msft = hdev->msft_data;
- int err;
-
- while (1) {
- monitor = idr_get_next(&hdev->adv_monitors_idr, &handle);
- if (!monitor) {
- /* All monitors have been resumed */
- msft->resuming = false;
- hci_update_passive_scan(hdev);
- return;
- }
-
- msft->pending_add_handle = (u16)handle;
- err = __msft_add_monitor_pattern(hdev, monitor);
-
- /* If success, we return and wait for monitor added callback */
- if (!err)
- return;
-
- /* Otherwise remove the monitor and keep registering */
- hci_free_adv_monitor(hdev, monitor);
- handle++;
- }
-}
-
/* is_mgmt = true matches the handle exposed to userspace via mgmt.
* is_mgmt = false matches the handle used by the msft controller.
* This function requires the caller holds hdev->lock
@@ -204,34 +180,58 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
return NULL;
}
-static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
- u8 status, u16 opcode,
- struct sk_buff *skb)
+/* This function requires the caller holds hdev->lock */
+static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
+ bdaddr_t *bdaddr, __u8 addr_type,
+ bool notify)
+{
+ struct monitored_device *dev, *tmp;
+ int count = 0;
+
+ list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+ /* mgmt_handle == 0 indicates remove all devices, whereas,
+ * bdaddr == NULL indicates remove all devices matching the
+ * mgmt_handle.
+ */
+ if ((!mgmt_handle || dev->handle == mgmt_handle) &&
+ (!bdaddr || (!bacmp(bdaddr, &dev->bdaddr) &&
+ addr_type == dev->addr_type))) {
+ if (notify && dev->notified) {
+ mgmt_adv_monitor_device_lost(hdev, dev->handle,
+ &dev->bdaddr,
+ dev->addr_type);
+ }
+
+ list_del(&dev->list);
+ kfree(dev);
+ count++;
+ }
+ }
+
+ return count;
+}
+
+static int msft_le_monitor_advertisement_cb(struct hci_dev *hdev, u16 opcode,
+ struct adv_monitor *monitor,
+ struct sk_buff *skb)
{
struct msft_rp_le_monitor_advertisement *rp;
- struct adv_monitor *monitor;
struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_data *msft = hdev->msft_data;
+ int status = 0;
hci_dev_lock(hdev);
- monitor = idr_find(&hdev->adv_monitors_idr, msft->pending_add_handle);
- if (!monitor) {
- bt_dev_err(hdev, "msft add advmon: monitor %u is not found!",
- msft->pending_add_handle);
+ rp = (struct msft_rp_le_monitor_advertisement *)skb->data;
+ if (skb->len < sizeof(*rp)) {
status = HCI_ERROR_UNSPECIFIED;
goto unlock;
}
+ status = rp->status;
if (status)
goto unlock;
- rp = (struct msft_rp_le_monitor_advertisement *)skb->data;
- if (skb->len < sizeof(*rp)) {
- status = HCI_ERROR_UNSPECIFIED;
- goto unlock;
- }
-
handle_data = kmalloc(sizeof(*handle_data), GFP_KERNEL);
if (!handle_data) {
status = HCI_ERROR_UNSPECIFIED;
@@ -246,29 +246,23 @@ static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
monitor->state = ADV_MONITOR_STATE_OFFLOADED;
unlock:
- if (status && monitor)
+ if (status)
hci_free_adv_monitor(hdev, monitor);
hci_dev_unlock(hdev);
- if (!msft->resuming)
- hci_add_adv_patterns_monitor_complete(hdev, status);
+ return status;
}
-static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
- u8 status, u16 opcode,
- struct sk_buff *skb)
+static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
+ u16 opcode,
+ struct adv_monitor *monitor,
+ struct sk_buff *skb)
{
- struct msft_cp_le_cancel_monitor_advertisement *cp;
struct msft_rp_le_cancel_monitor_advertisement *rp;
- struct adv_monitor *monitor;
struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_data *msft = hdev->msft_data;
- int err;
- bool pending;
-
- if (status)
- goto done;
+ int status = 0;
rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data;
if (skb->len < sizeof(*rp)) {
@@ -276,57 +270,46 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
goto done;
}
+ status = rp->status;
+ if (status)
+ goto done;
+
hci_dev_lock(hdev);
- cp = hci_sent_cmd_data(hdev, hdev->msft_opcode);
- handle_data = msft_find_handle_data(hdev, cp->handle, false);
+ handle_data = msft_find_handle_data(hdev, monitor->handle, true);
if (handle_data) {
- monitor = idr_find(&hdev->adv_monitors_idr,
- handle_data->mgmt_handle);
-
- if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED)
+ if (monitor->state == ADV_MONITOR_STATE_OFFLOADED)
monitor->state = ADV_MONITOR_STATE_REGISTERED;
/* Do not free the monitor if it is being removed due to
* suspend. It will be re-monitored on resume.
*/
- if (monitor && !msft->suspending)
+ if (!msft->suspending) {
hci_free_adv_monitor(hdev, monitor);
- list_del(&handle_data->list);
- kfree(handle_data);
- }
-
- /* If remove all monitors is required, we need to continue the process
- * here because the earlier it was paused when waiting for the
- * response from controller.
- */
- if (msft->pending_remove_handle == 0) {
- pending = hci_remove_all_adv_monitor(hdev, &err);
- if (pending) {
- hci_dev_unlock(hdev);
- return;
+ /* Clear any monitored devices by this Adv Monitor */
+ msft_monitor_device_del(hdev, handle_data->mgmt_handle,
+ NULL, 0, false);
}
- if (err)
- status = HCI_ERROR_UNSPECIFIED;
+ list_del(&handle_data->list);
+ kfree(handle_data);
}
hci_dev_unlock(hdev);
done:
- if (!msft->suspending)
- hci_remove_adv_monitor_complete(hdev, status);
+ return status;
}
+/* This function requires the caller holds hci_req_sync_lock */
static int msft_remove_monitor_sync(struct hci_dev *hdev,
struct adv_monitor *monitor)
{
struct msft_cp_le_cancel_monitor_advertisement cp;
struct msft_monitor_advertisement_handle_data *handle_data;
struct sk_buff *skb;
- u8 status;
handle_data = msft_find_handle_data(hdev, monitor->handle, true);
@@ -339,16 +322,14 @@ static int msft_remove_monitor_sync(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
HCI_CMD_TIMEOUT);
- if (IS_ERR(skb))
+ if (IS_ERR_OR_NULL(skb)) {
+ if (!skb)
+ return -EIO;
return PTR_ERR(skb);
+ }
- status = skb->data[0];
- skb_pull(skb, 1);
-
- msft_le_cancel_monitor_advertisement_cb(hdev, status, hdev->msft_opcode,
- skb);
-
- return status;
+ return msft_le_cancel_monitor_advertisement_cb(hdev, hdev->msft_opcode,
+ monitor, skb);
}
/* This function requires the caller holds hci_req_sync_lock */
@@ -419,7 +400,6 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
ptrdiff_t offset = 0;
u8 pattern_count = 0;
struct sk_buff *skb;
- u8 status;
if (!msft_monitor_pattern_valid(monitor))
return -EINVAL;
@@ -458,26 +438,25 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
HCI_CMD_TIMEOUT);
kfree(cp);
- if (IS_ERR(skb))
+ if (IS_ERR_OR_NULL(skb)) {
+ if (!skb)
+ return -EIO;
return PTR_ERR(skb);
+ }
- status = skb->data[0];
- skb_pull(skb, 1);
-
- msft_le_monitor_advertisement_cb(hdev, status, hdev->msft_opcode, skb);
-
- return status;
+ return msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
+ monitor, skb);
}
/* This function requires the caller holds hci_req_sync_lock */
-int msft_resume_sync(struct hci_dev *hdev)
+static void reregister_monitor(struct hci_dev *hdev)
{
- struct msft_data *msft = hdev->msft_data;
struct adv_monitor *monitor;
+ struct msft_data *msft = hdev->msft_data;
int handle = 0;
- if (!msft || !msft_monitor_supported(hdev))
- return 0;
+ if (!msft)
+ return;
msft->resuming = true;
@@ -491,12 +470,34 @@ int msft_resume_sync(struct hci_dev *hdev)
handle++;
}
- /* All monitors have been resumed */
+ /* All monitors have been reregistered */
msft->resuming = false;
+}
+
+/* This function requires the caller holds hci_req_sync_lock */
+int msft_resume_sync(struct hci_dev *hdev)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ if (!msft || !msft_monitor_supported(hdev))
+ return 0;
+
+ hci_dev_lock(hdev);
+
+ /* Clear already tracked devices on resume. Once the monitors are
+ * reregistered, devices in range will be found again after resume.
+ */
+ hdev->advmon_pend_notify = false;
+ msft_monitor_device_del(hdev, 0, NULL, 0, true);
+
+ hci_dev_unlock(hdev);
+
+ reregister_monitor(hdev);
return 0;
}
+/* This function requires the caller holds hci_req_sync_lock */
void msft_do_open(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
@@ -529,7 +530,7 @@ void msft_do_open(struct hci_dev *hdev)
/* Monitors get removed on power off, so we need to explicitly
* tell the controller to re-monitor.
*/
- reregister_monitor(hdev, 0);
+ reregister_monitor(hdev);
}
}
@@ -557,6 +558,14 @@ void msft_do_close(struct hci_dev *hdev)
list_del(&handle_data->list);
kfree(handle_data);
}
+
+ hci_dev_lock(hdev);
+
+ /* Clear any devices that are being monitored and notify device lost */
+ hdev->advmon_pend_notify = false;
+ msft_monitor_device_del(hdev, 0, NULL, 0, true);
+
+ hci_dev_unlock(hdev);
}
void msft_register(struct hci_dev *hdev)
@@ -590,10 +599,101 @@ void msft_unregister(struct hci_dev *hdev)
kfree(msft);
}
+/* This function requires the caller holds hdev->lock */
+static void msft_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 addr_type, __u16 mgmt_handle)
+{
+ struct monitored_device *dev;
+
+ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ bt_dev_err(hdev, "MSFT vendor event %u: no memory",
+ MSFT_EV_LE_MONITOR_DEVICE);
+ return;
+ }
+
+ bacpy(&dev->bdaddr, bdaddr);
+ dev->addr_type = addr_type;
+ dev->handle = mgmt_handle;
+ dev->notified = false;
+
+ INIT_LIST_HEAD(&dev->list);
+ list_add(&dev->list, &hdev->monitored_devices);
+ hdev->advmon_pend_notify = true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_device_lost(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 addr_type, __u16 mgmt_handle)
+{
+ if (!msft_monitor_device_del(hdev, mgmt_handle, bdaddr, addr_type,
+ true)) {
+ bt_dev_err(hdev, "MSFT vendor event %u: dev %pMR not in list",
+ MSFT_EV_LE_MONITOR_DEVICE, bdaddr);
+ }
+}
+
+static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
+ u8 ev, size_t len)
+{
+ void *data;
+
+ data = skb_pull_data(skb, len);
+ if (!data)
+ bt_dev_err(hdev, "Malformed MSFT vendor event: 0x%02x", ev);
+
+ return data;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct msft_ev_le_monitor_device *ev;
+ struct msft_monitor_advertisement_handle_data *handle_data;
+ u8 addr_type;
+
+ ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
+ if (!ev)
+ return;
+
+ bt_dev_dbg(hdev,
+ "MSFT vendor event 0x%02x: handle 0x%04x state %d addr %pMR",
+ MSFT_EV_LE_MONITOR_DEVICE, ev->monitor_handle,
+ ev->monitor_state, &ev->bdaddr);
+
+ handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
+ if (!handle_data)
+ return;
+
+ switch (ev->addr_type) {
+ case ADDR_LE_DEV_PUBLIC:
+ addr_type = BDADDR_LE_PUBLIC;
+ break;
+
+ case ADDR_LE_DEV_RANDOM:
+ addr_type = BDADDR_LE_RANDOM;
+ break;
+
+ default:
+ bt_dev_err(hdev,
+ "MSFT vendor event 0x%02x: unknown addr type 0x%02x",
+ MSFT_EV_LE_MONITOR_DEVICE, ev->addr_type);
+ return;
+ }
+
+ if (ev->monitor_state)
+ msft_device_found(hdev, &ev->bdaddr, addr_type,
+ handle_data->mgmt_handle);
+ else
+ msft_device_lost(hdev, &ev->bdaddr, addr_type,
+ handle_data->mgmt_handle);
+}
+
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
{
struct msft_data *msft = hdev->msft_data;
- u8 event;
+ u8 *evt_prefix;
+ u8 *evt;
if (!msft)
return;
@@ -602,13 +702,12 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
* matches, and otherwise just return.
*/
if (msft->evt_prefix_len > 0) {
- if (skb->len < msft->evt_prefix_len)
+ evt_prefix = msft_skb_pull(hdev, skb, 0, msft->evt_prefix_len);
+ if (!evt_prefix)
return;
- if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len))
+ if (memcmp(evt_prefix, msft->evt_prefix, msft->evt_prefix_len))
return;
-
- skb_pull(skb, msft->evt_prefix_len);
}
/* Every event starts at least with an event code and the rest of
@@ -617,10 +716,23 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
if (skb->len < 1)
return;
- event = *skb->data;
- skb_pull(skb, 1);
+ evt = msft_skb_pull(hdev, skb, 0, sizeof(*evt));
+ if (!evt)
+ return;
+
+ hci_dev_lock(hdev);
+
+ switch (*evt) {
+ case MSFT_EV_LE_MONITOR_DEVICE:
+ msft_monitor_device_evt(hdev, skb);
+ break;
- bt_dev_dbg(hdev, "MSFT vendor event %u", event);
+ default:
+ bt_dev_dbg(hdev, "MSFT vendor event 0x%02x", *evt);
+ break;
+ }
+
+ hci_dev_unlock(hdev);
}
__u64 msft_get_features(struct hci_dev *hdev)
@@ -664,66 +776,7 @@ static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev,
hci_dev_unlock(hdev);
}
-/* This function requires the caller holds hdev->lock */
-static int __msft_add_monitor_pattern(struct hci_dev *hdev,
- struct adv_monitor *monitor)
-{
- struct msft_cp_le_monitor_advertisement *cp;
- struct msft_le_monitor_advertisement_pattern_data *pattern_data;
- struct msft_le_monitor_advertisement_pattern *pattern;
- struct adv_pattern *entry;
- struct hci_request req;
- struct msft_data *msft = hdev->msft_data;
- size_t total_size = sizeof(*cp) + sizeof(*pattern_data);
- ptrdiff_t offset = 0;
- u8 pattern_count = 0;
- int err = 0;
-
- if (!msft_monitor_pattern_valid(monitor))
- return -EINVAL;
-
- list_for_each_entry(entry, &monitor->patterns, list) {
- pattern_count++;
- total_size += sizeof(*pattern) + entry->length;
- }
-
- cp = kmalloc(total_size, GFP_KERNEL);
- if (!cp)
- return -ENOMEM;
-
- cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT;
- cp->rssi_high = monitor->rssi.high_threshold;
- cp->rssi_low = monitor->rssi.low_threshold;
- cp->rssi_low_interval = (u8)monitor->rssi.low_threshold_timeout;
- cp->rssi_sampling_period = monitor->rssi.sampling_period;
-
- cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN;
-
- pattern_data = (void *)cp->data;
- pattern_data->count = pattern_count;
-
- list_for_each_entry(entry, &monitor->patterns, list) {
- pattern = (void *)(pattern_data->data + offset);
- /* the length also includes data_type and offset */
- pattern->length = entry->length + 2;
- pattern->data_type = entry->ad_type;
- pattern->start_byte = entry->offset;
- memcpy(pattern->pattern, entry->value, entry->length);
- offset += sizeof(*pattern) + entry->length;
- }
-
- hci_req_init(&req, hdev);
- hci_req_add(&req, hdev->msft_opcode, total_size, cp);
- err = hci_req_run_skb(&req, msft_le_monitor_advertisement_cb);
- kfree(cp);
-
- if (!err)
- msft->pending_add_handle = monitor->handle;
-
- return err;
-}
-
-/* This function requires the caller holds hdev->lock */
+/* This function requires the caller holds hci_req_sync_lock */
int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor)
{
struct msft_data *msft = hdev->msft_data;
@@ -734,41 +787,11 @@ int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor)
if (msft->resuming || msft->suspending)
return -EBUSY;
- return __msft_add_monitor_pattern(hdev, monitor);
-}
-
-/* This function requires the caller holds hdev->lock */
-static int __msft_remove_monitor(struct hci_dev *hdev,
- struct adv_monitor *monitor, u16 handle)
-{
- struct msft_cp_le_cancel_monitor_advertisement cp;
- struct msft_monitor_advertisement_handle_data *handle_data;
- struct hci_request req;
- struct msft_data *msft = hdev->msft_data;
- int err = 0;
-
- handle_data = msft_find_handle_data(hdev, monitor->handle, true);
-
- /* If no matched handle, just remove without telling controller */
- if (!handle_data)
- return -ENOENT;
-
- cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
- cp.handle = handle_data->msft_handle;
-
- hci_req_init(&req, hdev);
- hci_req_add(&req, hdev->msft_opcode, sizeof(cp), &cp);
- err = hci_req_run_skb(&req, msft_le_cancel_monitor_advertisement_cb);
-
- if (!err)
- msft->pending_remove_handle = handle;
-
- return err;
+ return msft_add_monitor_sync(hdev, monitor);
}
-/* This function requires the caller holds hdev->lock */
-int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
- u16 handle)
+/* This function requires the caller holds hci_req_sync_lock */
+int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
{
struct msft_data *msft = hdev->msft_data;
@@ -778,7 +801,7 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
if (msft->resuming || msft->suspending)
return -EBUSY;
- return __msft_remove_monitor(hdev, monitor, handle);
+ return msft_remove_monitor_sync(hdev, monitor);
}
void msft_req_add_set_filter_enable(struct hci_request *req, bool enable)
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index afcaf7d3b1cb..2a63205b377b 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -20,8 +20,7 @@ void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb);
__u64 msft_get_features(struct hci_dev *hdev);
int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor);
-int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
- u16 handle);
+int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor);
void msft_req_add_set_filter_enable(struct hci_request *req, bool enable);
int msft_set_filter_enable(struct hci_dev *hdev, bool enable);
int msft_suspend_sync(struct hci_dev *hdev);
@@ -49,8 +48,7 @@ static inline int msft_add_monitor_pattern(struct hci_dev *hdev,
}
static inline int msft_remove_monitor(struct hci_dev *hdev,
- struct adv_monitor *monitor,
- u16 handle)
+ struct adv_monitor *monitor)
{
return -EOPNOTSUPP;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4bf4ea6cbb5e..21e24da4847f 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -902,7 +902,10 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how)
lock_sock(sk);
if (!sk->sk_shutdown) {
sk->sk_shutdown = SHUTDOWN_MASK;
+
+ release_sock(sk);
__rfcomm_sock_close(sk);
+ lock_sock(sk);
if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
!(current->flags & PF_EXITING))
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index ebd78fdbd6e8..8009e0e93216 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -35,7 +35,6 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/rfcomm.h>
-#define RFCOMM_TTY_MAGIC 0x6d02 /* magic number for rfcomm struct */
#define RFCOMM_TTY_PORTS RFCOMM_MAX_DEV /* whole lotta rfcomm devices */
#define RFCOMM_TTY_MAJOR 216 /* device node major id of the usb/bluetooth.c driver */
#define RFCOMM_TTY_MINOR 0
@@ -855,7 +854,8 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned l
return -ENOIOCTLCMD;
}
-static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
+static void rfcomm_tty_set_termios(struct tty_struct *tty,
+ const struct ktermios *old)
{
struct ktermios *new = &tty->termios;
int old_baud_rate = tty_termios_baud_rate(old);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8eabf41b2993..1111da4e2f2b 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -574,19 +574,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
- return -EBADFD;
+ lock_sock(sk);
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
- if (sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
+ if (!hdev) {
+ err = -EHOSTUNREACH;
+ goto done;
+ }
hci_dev_lock(hdev);
- lock_sock(sk);
-
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
@@ -885,7 +890,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
err = -EBADFD;
break;
}
- if (enhanced_sco_capable(hdev) &&
+ if (enhanced_sync_conn_capable(hdev) &&
voice.setting == BT_VOICE_TRANSPARENT)
sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT;
hci_dev_put(hdev);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index fbc896323bec..e78dadfc5829 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -72,13 +72,16 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
args->args[3], args->args[4]);
}
+extern const struct bpf_link_ops bpf_struct_ops_link_lops;
+
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_progs *tprogs;
+ struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
int prog_ret;
@@ -92,8 +95,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(args))
return PTR_ERR(args);
- tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
- if (!tprogs) {
+ tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
+ if (!tlinks) {
err = -ENOMEM;
goto out;
}
@@ -105,8 +108,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
}
set_vm_flush_reset_perms(image);
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /* prog doesn't take the ownership of the reference from caller */
+ bpf_prog_inc(prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog);
+
op_idx = prog->expected_attach_type;
- err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+ err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
image, image + PAGE_SIZE);
if (err < 0)
@@ -124,7 +136,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
out:
kfree(args);
bpf_jit_free_exec(image);
- kfree(tprogs);
+ if (link)
+ bpf_link_put(&link->link);
+ kfree(tlinks);
return err;
}
@@ -145,7 +159,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
const struct btf_type *state;
s32 type_id;
@@ -162,7 +177,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (err < 0)
return err;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 46dd95755967..13d578ce2a09 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -5,6 +5,7 @@
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/slab.h>
+#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
@@ -14,6 +15,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
+#include <net/page_pool.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
@@ -52,10 +54,11 @@ static void bpf_test_timer_leave(struct bpf_test_timer *t)
rcu_read_unlock();
}
-static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
+static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations,
+ u32 repeat, int *err, u32 *duration)
__must_hold(rcu)
{
- t->i++;
+ t->i += iterations;
if (t->i >= repeat) {
/* We're done. */
t->time_spent += ktime_get_ns() - t->time_start;
@@ -87,6 +90,285 @@ reset:
return false;
}
+/* We put this struct at the head of each page with a context and frame
+ * initialised when the page is allocated, so we don't have to do this on each
+ * repetition of the test run.
+ */
+struct xdp_page_head {
+ struct xdp_buff orig_ctx;
+ struct xdp_buff ctx;
+ struct xdp_frame frm;
+ u8 data[];
+};
+
+struct xdp_test_data {
+ struct xdp_buff *orig_ctx;
+ struct xdp_rxq_info rxq;
+ struct net_device *dev;
+ struct page_pool *pp;
+ struct xdp_frame **frames;
+ struct sk_buff **skbs;
+ struct xdp_mem_info mem;
+ u32 batch_size;
+ u32 frame_cnt;
+};
+
+#define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
+#define TEST_XDP_MAX_BATCH 256
+
+static void xdp_test_run_init_page(struct page *page, void *arg)
+{
+ struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
+ struct xdp_buff *new_ctx, *orig_ctx;
+ u32 headroom = XDP_PACKET_HEADROOM;
+ struct xdp_test_data *xdp = arg;
+ size_t frm_len, meta_len;
+ struct xdp_frame *frm;
+ void *data;
+
+ orig_ctx = xdp->orig_ctx;
+ frm_len = orig_ctx->data_end - orig_ctx->data_meta;
+ meta_len = orig_ctx->data - orig_ctx->data_meta;
+ headroom -= meta_len;
+
+ new_ctx = &head->ctx;
+ frm = &head->frm;
+ data = &head->data;
+ memcpy(data + headroom, orig_ctx->data_meta, frm_len);
+
+ xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
+ xdp_prepare_buff(new_ctx, data, headroom, frm_len, true);
+ new_ctx->data = new_ctx->data_meta + meta_len;
+
+ xdp_update_frame_from_buff(new_ctx, frm);
+ frm->mem = new_ctx->rxq->mem;
+
+ memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx));
+}
+
+static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
+{
+ struct page_pool *pp;
+ int err = -ENOMEM;
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = xdp->batch_size,
+ .nid = NUMA_NO_NODE,
+ .init_callback = xdp_test_run_init_page,
+ .init_arg = xdp,
+ };
+
+ xdp->frames = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL);
+ if (!xdp->frames)
+ return -ENOMEM;
+
+ xdp->skbs = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL);
+ if (!xdp->skbs)
+ goto err_skbs;
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ goto err_pp;
+ }
+
+ /* will copy 'mem.id' into pp->xdp_mem_id */
+ err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
+ if (err)
+ goto err_mmodel;
+
+ xdp->pp = pp;
+
+ /* We create a 'fake' RXQ referencing the original dev, but with an
+ * xdp_mem_info pointing to our page_pool
+ */
+ xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
+ xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
+ xdp->rxq.mem.id = pp->xdp_mem_id;
+ xdp->dev = orig_ctx->rxq->dev;
+ xdp->orig_ctx = orig_ctx;
+
+ return 0;
+
+err_mmodel:
+ page_pool_destroy(pp);
+err_pp:
+ kvfree(xdp->skbs);
+err_skbs:
+ kvfree(xdp->frames);
+ return err;
+}
+
+static void xdp_test_run_teardown(struct xdp_test_data *xdp)
+{
+ xdp_unreg_mem_model(&xdp->mem);
+ page_pool_destroy(xdp->pp);
+ kfree(xdp->frames);
+ kfree(xdp->skbs);
+}
+
+static bool ctx_was_changed(struct xdp_page_head *head)
+{
+ return head->orig_ctx.data != head->ctx.data ||
+ head->orig_ctx.data_meta != head->ctx.data_meta ||
+ head->orig_ctx.data_end != head->ctx.data_end;
+}
+
+static void reset_ctx(struct xdp_page_head *head)
+{
+ if (likely(!ctx_was_changed(head)))
+ return;
+
+ head->ctx.data = head->orig_ctx.data;
+ head->ctx.data_meta = head->orig_ctx.data_meta;
+ head->ctx.data_end = head->orig_ctx.data_end;
+ xdp_update_frame_from_buff(&head->ctx, &head->frm);
+}
+
+static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
+ struct sk_buff **skbs,
+ struct net_device *dev)
+{
+ gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+ int i, n;
+ LIST_HEAD(list);
+
+ n = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, (void **)skbs);
+ if (unlikely(n == 0)) {
+ for (i = 0; i < nframes; i++)
+ xdp_return_frame(frames[i]);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nframes; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct sk_buff *skb = skbs[i];
+
+ skb = __xdp_build_skb_from_frame(xdpf, skb, dev);
+ if (!skb) {
+ xdp_return_frame(xdpf);
+ continue;
+ }
+
+ list_add_tail(&skb->list, &list);
+ }
+ netif_receive_skb_list(&list);
+
+ return 0;
+}
+
+static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
+ u32 repeat)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ int err = 0, act, ret, i, nframes = 0, batch_sz;
+ struct xdp_frame **frames = xdp->frames;
+ struct xdp_page_head *head;
+ struct xdp_frame *frm;
+ bool redirect = false;
+ struct xdp_buff *ctx;
+ struct page *page;
+
+ batch_sz = min_t(u32, repeat, xdp->batch_size);
+
+ local_bh_disable();
+ xdp_set_return_frame_no_direct();
+
+ for (i = 0; i < batch_sz; i++) {
+ page = page_pool_dev_alloc_pages(xdp->pp);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ head = phys_to_virt(page_to_phys(page));
+ reset_ctx(head);
+ ctx = &head->ctx;
+ frm = &head->frm;
+ xdp->frame_cnt++;
+
+ act = bpf_prog_run_xdp(prog, ctx);
+
+ /* if program changed pkt bounds we need to update the xdp_frame */
+ if (unlikely(ctx_was_changed(head))) {
+ ret = xdp_update_frame_from_buff(ctx, frm);
+ if (ret) {
+ xdp_return_buff(ctx);
+ continue;
+ }
+ }
+
+ switch (act) {
+ case XDP_TX:
+ /* we can't do a real XDP_TX since we're not in the
+ * driver, so turn it into a REDIRECT back to the same
+ * index
+ */
+ ri->tgt_index = xdp->dev->ifindex;
+ ri->map_id = INT_MAX;
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ fallthrough;
+ case XDP_REDIRECT:
+ redirect = true;
+ ret = xdp_do_redirect_frame(xdp->dev, ctx, frm, prog);
+ if (ret)
+ xdp_return_buff(ctx);
+ break;
+ case XDP_PASS:
+ frames[nframes++] = frm;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(NULL, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ xdp_return_buff(ctx);
+ break;
+ }
+ }
+
+out:
+ if (redirect)
+ xdp_do_flush();
+ if (nframes) {
+ ret = xdp_recv_frames(frames, nframes, xdp->skbs, xdp->dev);
+ if (ret)
+ err = ret;
+ }
+
+ xdp_clear_return_frame_no_direct();
+ local_bh_enable();
+ return err;
+}
+
+static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx,
+ u32 repeat, u32 batch_size, u32 *time)
+
+{
+ struct xdp_test_data xdp = { .batch_size = batch_size };
+ struct bpf_test_timer t = { .mode = NO_MIGRATE };
+ int ret;
+
+ if (!repeat)
+ repeat = 1;
+
+ ret = xdp_test_run_setup(&xdp, ctx);
+ if (ret)
+ return ret;
+
+ bpf_test_timer_enter(&t);
+ do {
+ xdp.frame_cnt = 0;
+ ret = xdp_test_run_batch(&xdp, prog, repeat - t.i);
+ if (unlikely(ret < 0))
+ break;
+ } while (bpf_test_timer_continue(&t, xdp.frame_cnt, repeat, &ret, time));
+ bpf_test_timer_leave(&t);
+
+ xdp_test_run_teardown(&xdp);
+ return ret;
+}
+
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
@@ -118,7 +400,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = bpf_prog_run(prog, ctx);
- } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time));
bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
@@ -130,7 +412,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
- u32 size, u32 retval, u32 duration)
+ struct skb_shared_info *sinfo, u32 size,
+ u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
int err = -EFAULT;
@@ -145,8 +428,42 @@ static int bpf_test_finish(const union bpf_attr *kattr,
err = -ENOSPC;
}
- if (data_out && copy_to_user(data_out, data, copy_size))
- goto out;
+ if (data_out) {
+ int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+ if (len < 0) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ if (copy_to_user(data_out, data, len))
+ goto out;
+
+ if (sinfo) {
+ int i, offset = len;
+ u32 data_len;
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ if (offset >= copy_size) {
+ err = -ENOSPC;
+ break;
+ }
+
+ data_len = min_t(u32, copy_size - offset,
+ skb_frag_size(frag));
+
+ if (copy_to_user(data_out + offset,
+ skb_frag_address(frag),
+ data_len))
+ goto out;
+
+ offset += data_len;
+ }
+ }
+ }
+
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -165,12 +482,14 @@ out:
* future.
*/
__diag_push();
-__diag_ignore(GCC, 8, "-Wmissing-prototypes",
- "Global functions as their definitions will be in vmlinux BTF");
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
int noinline bpf_fentry_test1(int a)
{
return a + 1;
}
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
@@ -232,28 +551,221 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
return sk;
}
-__diag_pop();
+struct prog_test_member1 {
+ int a;
+};
-ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+struct prog_test_member {
+ struct prog_test_member1 m;
+ int c;
+};
+
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_member memb;
+ struct prog_test_ref_kfunc *next;
+ refcount_t cnt;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+ .cnt = REFCOUNT_INIT(1),
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ refcount_inc(&prog_test_struct.cnt);
+ return &prog_test_struct;
+}
+
+noinline struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+ if (!p)
+ return;
+
+ refcount_dec(&p->cnt);
+}
+
+noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
+{
+}
+
+noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
+{
+ if (size > 2 * sizeof(int))
+ return NULL;
+
+ return (int *)p;
+}
+
+noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
+}
+
+noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+/* the next 2 ones can't be really used for testing expect to ensure
+ * that the verifier rejects the call.
+ * Acquire functions must return struct pointers, so these ones are
+ * failing.
+ */
+noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+noinline void bpf_kfunc_call_int_mem_release(int *p)
+{
+}
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
+{
+ struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
+
+ if (!p)
+ return NULL;
+ refcount_inc(&p->cnt);
+ return p;
+}
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
-BTF_SET_START(test_sk_kfunc_ids)
-BTF_ID(func, bpf_kfunc_call_test1)
-BTF_ID(func, bpf_kfunc_call_test2)
-BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
{
- if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
- return true;
- return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
}
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
- u32 headroom, u32 tailroom)
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_destructive(void)
+{
+}
+
+__diag_pop();
+
+ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+
+BTF_SET8_START(test_sk_check_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_SET8_END(test_sk_check_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+ u32 size, u32 headroom, u32 tailroom)
{
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
- u32 user_size = kattr->test.data_size_in;
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -283,7 +795,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
int b = 2, err = -EFAULT;
u32 retval = 0;
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
switch (prog->expected_attach_type) {
@@ -347,7 +859,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
/* doesn't support data_in/out, ctx_out, duration, or repeat */
if (kattr->test.data_in || kattr->test.data_out ||
kattr->test.ctx_out || kattr->test.duration ||
- kattr->test.repeat)
+ kattr->test.repeat || kattr->test.batch_size)
return -EINVAL;
if (ctx_size_in < prog->aux->max_ctx_offset ||
@@ -467,6 +979,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
{
struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
+ if (!skb->len)
+ return -EINVAL;
+
if (!__skb)
return 0;
@@ -524,7 +1039,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
cb->pkt_len = skb->len;
} else {
if (__skb->wire_len < skb->len ||
- __skb->wire_len > GSO_MAX_SIZE)
+ __skb->wire_len > GSO_LEGACY_MAX_SIZE)
return -EINVAL;
cb->pkt_len = __skb->wire_len;
}
@@ -578,10 +1093,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
- data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+ data = bpf_test_init(kattr, kattr->test.data_size_in,
+ size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
return PTR_ERR(data);
@@ -683,7 +1199,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
- ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+ duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
@@ -757,22 +1274,38 @@ static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
+ bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- u32 headroom = XDP_PACKET_HEADROOM;
+ u32 batch_size = kattr->test.batch_size;
+ u32 retval = 0, duration, max_data_sz;
u32 size = kattr->test.data_size_in;
+ u32 headroom = XDP_PACKET_HEADROOM;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
+ struct skb_shared_info *sinfo;
struct xdp_buff xdp = {};
- u32 retval, duration;
+ int i, ret = -EINVAL;
struct xdp_md *ctx;
- u32 max_data_sz;
void *data;
- int ret = -EINVAL;
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
+ if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES)
+ return -EINVAL;
+
+ if (do_live) {
+ if (!batch_size)
+ batch_size = NAPI_POLL_WEIGHT;
+ else if (batch_size > TEST_XDP_MAX_BATCH)
+ return -E2BIG;
+
+ headroom += sizeof(struct xdp_page_head);
+ } else if (batch_size) {
+ return -EINVAL;
+ }
+
ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -781,33 +1314,81 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
/* There can't be user provided data before the meta data */
if (ctx->data_meta || ctx->data_end != size ||
ctx->data > ctx->data_end ||
- unlikely(xdp_metalen_invalid(ctx->data)))
+ unlikely(xdp_metalen_invalid(ctx->data)) ||
+ (do_live && (kattr->test.data_out || kattr->test.ctx_out)))
goto free_ctx;
/* Meta data is allocated from the headroom */
headroom -= ctx->data;
}
- /* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
+ if (size > max_data_sz) {
+ /* disallow live data mode for jumbo frames */
+ if (do_live)
+ goto free_ctx;
+ size = max_data_sz;
+ }
- data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+ data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto free_ctx;
}
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
- &rxqueue->xdp_rxq);
+ rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+ xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret)
goto free_data;
+ if (unlikely(kattr->test.data_size_in > size)) {
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+ while (size < kattr->test.data_size_in) {
+ struct page *page;
+ skb_frag_t *frag;
+ u32 data_len;
+
+ if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, page);
+
+ data_len = min_t(u32, kattr->test.data_size_in - size,
+ PAGE_SIZE);
+ skb_frag_size_set(frag, data_len);
+
+ if (copy_from_user(page_address(page), data_in + size,
+ data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ sinfo->xdp_frags_size += data_len;
+ size += data_len;
+ }
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
if (repeat > 1)
bpf_prog_change_xdp(NULL, prog);
- ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+
+ if (do_live)
+ ret = bpf_test_run_xdp_live(prog, &xdp, repeat, batch_size, &duration);
+ else
+ ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
* even if the test run failed.
@@ -816,12 +1397,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ret)
goto out;
- if (xdp.data_meta != data + headroom ||
- xdp.data_end != xdp.data_meta + size)
- size = xdp.data_end - xdp.data_meta;
-
- ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
- duration);
+ size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+ retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct xdp_md));
@@ -830,6 +1408,8 @@ out:
if (repeat > 1)
bpf_prog_change_xdp(prog, NULL);
free_data:
+ for (i = 0; i < sinfo->nr_frags; i++)
+ __free_page(skb_frag_page(&sinfo->frags[i]));
kfree(data);
free_ctx:
kfree(ctx);
@@ -867,16 +1447,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
void *data;
int ret;
- if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
- return -EINVAL;
-
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
if (size < ETH_HLEN)
return -EINVAL;
- data = bpf_test_init(kattr, size, 0, 0);
+ data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -905,14 +1482,14 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
do {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size, flags);
- } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
if (ret < 0)
goto out;
- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
- retval, duration);
+ ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+ sizeof(flow_keys), retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
@@ -934,10 +1511,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
u32 retval, duration;
int ret = -EINVAL;
- if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
- return -EINVAL;
-
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
@@ -960,7 +1534,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
goto out;
- if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ if (user_ctx->local_port > U16_MAX) {
ret = -ERANGE;
goto out;
}
@@ -968,7 +1542,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
ctx.family = (u16)user_ctx->family;
ctx.protocol = (u16)user_ctx->protocol;
ctx.dport = (u16)user_ctx->local_port;
- ctx.sport = (__force __be16)user_ctx->remote_port;
+ ctx.sport = user_ctx->remote_port;
switch (ctx.family) {
case AF_INET:
@@ -1000,7 +1574,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
do {
ctx.selected_sk = NULL;
retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
- } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
if (ret < 0)
@@ -1016,7 +1590,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
- ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
@@ -1039,7 +1613,8 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
if (kattr->test.data_in || kattr->test.data_out ||
kattr->test.ctx_out || kattr->test.duration ||
- kattr->test.repeat || kattr->test.flags)
+ kattr->test.repeat || kattr->test.flags ||
+ kattr->test.batch_size)
return -EINVAL;
if (ctx_size_in < prog->aux->max_ctx_offset ||
@@ -1067,3 +1642,37 @@ out:
kfree(ctx);
return err;
}
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &test_sk_check_kfunc_ids,
+};
+
+BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(struct, prog_test_member)
+BTF_ID(func, bpf_kfunc_call_memb_release)
+
+static int __init bpf_prog_test_run_init(void)
+{
+ const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = {
+ {
+ .btf_id = bpf_prog_test_dtor_kfunc_ids[0],
+ .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1]
+ },
+ {
+ .btf_id = bpf_prog_test_dtor_kfunc_ids[2],
+ .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3],
+ },
+ };
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set);
+ return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
+ ARRAY_SIZE(bpf_prog_test_dtor_kfunc),
+ THIS_MODULE);
+}
+late_initcall(bpf_prog_test_run_init);
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 51a941b56ec3..422ec6e7ccff 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -70,7 +70,7 @@ static int bpfilter_process_sockopt(struct sock *sk, int optname,
.addr = (uintptr_t)optval.user,
.len = optlen,
};
- if (uaccess_kernel() || sockptr_is_kernel(optval)) {
+ if (sockptr_is_kernel(optval)) {
pr_err("kernel access not supported\n");
return -EFAULT;
}
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 7fb9a021873b..24bd1c0a9a5a 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o
-bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o
+bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o br_mst.o
bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1fac72cc617f..96e91d69a9a8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -265,6 +265,9 @@ int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
err = br_multicast_toggle_vlan_snooping(br, on, extack);
break;
+ case BR_BOOLOPT_MST_ENABLE:
+ err = br_mst_set_enabled(br, on, extack);
+ break;
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -281,6 +284,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
return br_opt_get(br, BROPT_NO_LL_LEARN);
case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
+ case BR_BOOLOPT_MST_ENABLE:
+ return br_opt_get(br, BROPT_MST_ENABLED);
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -342,23 +347,26 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit(struct net *net)
+static void __net_exit br_net_exit_batch(struct list_head *net_list)
{
struct net_device *dev;
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- for_each_netdev(net, dev)
- if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
+
+ list_for_each_entry(net, net_list, exit_list)
+ for_each_netdev(net, dev)
+ if (netif_is_bridge_master(dev))
+ br_dev_delete(dev, &list);
unregister_netdevice_many(&list);
- rtnl_unlock();
+ rtnl_unlock();
}
static struct pernet_operations br_net_ops = {
- .exit = br_net_exit,
+ .exit_batch = br_net_exit_batch,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 3db1def4437b..e5e48c6e35d7 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -84,7 +84,7 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->pkt_type = PACKET_HOST;
- netif_rx_ni(skb);
+ netif_rx(skb);
}
}
@@ -364,7 +364,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
reply->ip_summed = CHECKSUM_UNNECESSARY;
reply->pkt_type = PACKET_HOST;
- netif_rx_ni(reply);
+ netif_rx(reply);
}
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8d6bab244c4a..b82906fc999a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -251,10 +251,10 @@ static int br_set_mac_address(struct net_device *dev, void *p)
static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, "bridge", sizeof(info->driver));
- strlcpy(info->version, BR_VERSION, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, "N/A", sizeof(info->bus_info));
+ strscpy(info->driver, "bridge", sizeof(info->driver));
+ strscpy(info->version, BR_VERSION, sizeof(info->version));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->bus_info, "N/A", sizeof(info->bus_info));
}
static int br_get_link_ksettings(struct net_device *dev,
@@ -465,6 +465,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fix_features = br_fix_features,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
+ .ndo_fdb_del_bulk = br_fdb_delete_bulk,
.ndo_fdb_dump = br_fdb_dump,
.ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ccda68bd473..e7f4fccb6adb 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -558,18 +558,161 @@ void br_fdb_cleanup(struct work_struct *work)
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
}
-/* Completely flush all dynamic entries in forwarding database.*/
-void br_fdb_flush(struct net_bridge *br)
+static bool __fdb_flush_matches(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *f,
+ const struct net_bridge_fdb_flush_desc *desc)
+{
+ const struct net_bridge_port *dst = READ_ONCE(f->dst);
+ int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex;
+
+ if (desc->vlan_id && desc->vlan_id != f->key.vlan_id)
+ return false;
+ if (desc->port_ifindex && desc->port_ifindex != port_ifidx)
+ return false;
+ if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags)
+ return false;
+
+ return true;
+}
+
+/* Flush forwarding database entries matching the description */
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc)
{
struct net_bridge_fdb_entry *f;
- struct hlist_node *tmp;
- spin_lock_bh(&br->hash_lock);
- hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
- if (!test_bit(BR_FDB_STATIC, &f->flags))
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (!__fdb_flush_matches(br, f, desc))
+ continue;
+
+ spin_lock_bh(&br->hash_lock);
+ if (!hlist_unhashed(&f->fdb_node))
fdb_delete(br, f, true);
+ spin_unlock_bh(&br->hash_lock);
}
- spin_unlock_bh(&br->hash_lock);
+ rcu_read_unlock();
+}
+
+static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state)
+{
+ unsigned long flags = 0;
+
+ if (ndm_state & NUD_PERMANENT)
+ __set_bit(BR_FDB_LOCAL, &flags);
+ if (ndm_state & NUD_NOARP)
+ __set_bit(BR_FDB_STATIC, &flags);
+
+ return flags;
+}
+
+static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags)
+{
+ unsigned long flags = 0;
+
+ if (ndm_flags & NTF_USE)
+ __set_bit(BR_FDB_ADDED_BY_USER, &flags);
+ if (ndm_flags & NTF_EXT_LEARNED)
+ __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags);
+ if (ndm_flags & NTF_OFFLOADED)
+ __set_bit(BR_FDB_OFFLOADED, &flags);
+ if (ndm_flags & NTF_STICKY)
+ __set_bit(BR_FDB_STICKY, &flags);
+
+ return flags;
+}
+
+static int __fdb_flush_validate_ifindex(const struct net_bridge *br,
+ int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device *dev;
+
+ dev = __dev_get_by_index(dev_net(br->dev), ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex");
+ return -ENODEV;
+ }
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_master(dev) && dev != br->dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Flush bridge device does not match target bridge device");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_port(dev)) {
+ struct net_bridge_port *p = br_port_get_rtnl(dev);
+
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
+ struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid };
+ struct net_bridge_port *p = NULL;
+ struct net_bridge *br;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br = p->br;
+ }
+
+ if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
+ return -EINVAL;
+ }
+ if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set");
+ return -EINVAL;
+ }
+
+ desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state);
+ desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags);
+ if (tb[NDA_NDM_STATE_MASK]) {
+ u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]);
+
+ desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask);
+ }
+ if (tb[NDA_NDM_FLAGS_MASK]) {
+ u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]);
+
+ desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask);
+ }
+ if (tb[NDA_IFINDEX]) {
+ int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]);
+
+ err = __fdb_flush_validate_ifindex(br, ifidx, extack);
+ if (err)
+ return err;
+ desc.port_ifindex = ifidx;
+ } else if (p) {
+ /* flush was invoked with port device and NTF_MASTER */
+ desc.port_ifindex = p->dev->ifindex;
+ }
+
+ br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n",
+ desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask);
+
+ br_fdb_flush(br, &desc);
+
+ return 0;
}
/* Flush all entries referring to a specific port.
@@ -1110,7 +1253,8 @@ static int __br_fdb_delete(struct net_bridge *br,
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ec646656dbf1..02bb620d3b8d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a52ad81596b7..228fd5b20f10 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -40,12 +40,21 @@ static int port_cost(struct net_device *dev)
switch (ecmd.base.speed) {
case SPEED_10000:
return 2;
- case SPEED_1000:
+ case SPEED_5000:
+ return 3;
+ case SPEED_2500:
return 4;
+ case SPEED_1000:
+ return 5;
case SPEED_100:
return 19;
case SPEED_10:
return 100;
+ case SPEED_UNKNOWN:
+ return 100;
+ default:
+ if (ecmd.base.speed > SPEED_10000)
+ return 1;
}
}
@@ -274,7 +283,7 @@ static void destroy_nbp(struct net_bridge_port *p)
p->br = NULL;
p->dev = NULL;
- dev_put_track(dev, &p->dev_tracker);
+ netdev_put(dev, &p->dev_tracker);
kobject_put(&p->kobj);
}
@@ -423,7 +432,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
return ERR_PTR(-ENOMEM);
p->br = br;
- dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
p->dev = dev;
p->path_cost = port_cost(dev);
p->priority = 0x8000 >> BR_PORT_BITS;
@@ -434,7 +443,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
br_stp_port_timer_init(p);
err = br_multicast_add_port(p);
if (err) {
- dev_put_track(dev, &p->dev_tracker);
+ netdev_put(dev, &p->dev_tracker);
kfree(p);
p = ERR_PTR(err);
}
@@ -517,16 +526,16 @@ void br_mtu_auto_adjust(struct net_bridge *br)
static void br_set_gso_limits(struct net_bridge *br)
{
- unsigned int gso_max_size = GSO_MAX_SIZE;
- u16 gso_max_segs = GSO_MAX_SEGS;
+ unsigned int tso_max_size = TSO_MAX_SIZE;
const struct net_bridge_port *p;
+ u16 tso_max_segs = TSO_MAX_SEGS;
list_for_each_entry(p, &br->port_list, list) {
- gso_max_size = min(gso_max_size, p->dev->gso_max_size);
- gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
+ tso_max_size = min(tso_max_size, p->dev->tso_max_size);
+ tso_max_segs = min(tso_max_segs, p->dev->tso_max_segs);
}
- netif_set_gso_max_size(br->dev, gso_max_size);
- netif_set_gso_max_segs(br->dev, gso_max_segs);
+ netif_set_tso_max_size(br->dev, tso_max_size);
+ netif_set_tso_max_segs(br->dev, tso_max_segs);
}
/*
@@ -568,26 +577,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
!is_valid_ether_addr(dev->dev_addr))
return -EINVAL;
- /* Also don't allow bridging of net devices that are DSA masters, since
- * the bridge layer rx_handler prevents the DSA fake ethertype handler
- * to be invoked, so we don't get the chance to strip off and parse the
- * DSA switch tag protocol header (the bridge layer just returns
- * RX_HANDLER_CONSUMED, stopping RX processing for these frames).
- * The only case where that would not be an issue is when bridging can
- * already be offloaded, such as when the DSA master is itself a DSA
- * or plain switchdev port, and is bridged only with other ports from
- * the same hardware device.
- */
- if (netdev_uses_dsa(dev)) {
- list_for_each_entry(p, &br->port_list, list) {
- if (!netdev_port_same_parent_id(dev, p->dev)) {
- NL_SET_ERR_MSG(extack,
- "Cannot do software bridging with a DSA master");
- return -EINVAL;
- }
- }
- }
-
/* No bridging of bridges */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
NL_SET_ERR_MSG(extack,
@@ -615,6 +604,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
br_multicast_del_port(p);
+ netdev_put(dev, &p->dev_tracker);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -724,10 +714,10 @@ err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
br_multicast_del_port(p);
+ netdev_put(dev, &p->dev_tracker);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
- dev_put(dev);
return err;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b50382f957c1..68b3e850bcb9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
dev_sw_netstats_rx_add(brdev, skb->len);
vg = br_vlan_group_rcu(br);
+
+ /* Reset the offload_fwd_mark because there could be a stacked
+ * bridge above, and it should not think this bridge it doing
+ * that bridge's work forwarding out its ports.
+ */
+ br_switchdev_frame_unmark(skb);
+
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc mode when someone
* may be running packet capture.
@@ -78,20 +85,38 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
u16 vid = 0;
u8 state;
- if (!p || p->state == BR_STATE_DISABLED)
+ if (!p)
goto drop;
+ br = p->br;
+
+ if (br_mst_is_enabled(br)) {
+ state = BR_STATE_FORWARDING;
+ } else {
+ if (p->state == BR_STATE_DISABLED)
+ goto drop;
+
+ state = p->state;
+ }
+
brmctx = &p->br->multicast_ctx;
pmctx = &p->multicast_ctx;
- state = p->state;
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
&state, &vlan))
goto out;
+ if (p->flags & BR_PORT_LOCKED) {
+ struct net_bridge_fdb_entry *fdb_src =
+ br_fdb_find_rcu(br, eth_hdr(skb)->h_source, vid);
+
+ if (!fdb_src || READ_ONCE(fdb_src->dst) != p ||
+ test_bit(BR_FDB_LOCAL, &fdb_src->flags))
+ goto drop;
+ }
+
nbp_switchdev_frame_mark(p, skb);
/* insert into forwarding database after filtering to avoid spoofing */
- br = p->br;
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
@@ -361,9 +386,13 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_PASS;
forward:
+ if (br_mst_is_enabled(p->br))
+ goto defer_stp_filtering;
+
switch (p->state) {
case BR_STATE_FORWARDING:
case BR_STATE_LEARNING:
+defer_stp_filtering:
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 4556d913955b..589ff497d50c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -251,14 +251,16 @@ static int __mdb_fill_info(struct sk_buff *skb,
__mdb_entry_fill_flags(&e, flags);
e.ifindex = ifindex;
e.vid = mp->addr.vid;
- if (mp->addr.proto == htons(ETH_P_IP))
+ if (mp->addr.proto == htons(ETH_P_IP)) {
e.addr.u.ip4 = mp->addr.dst.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- else if (mp->addr.proto == htons(ETH_P_IPV6))
+ } else if (mp->addr.proto == htons(ETH_P_IPV6)) {
e.addr.u.ip6 = mp->addr.dst.ip6;
#endif
- else
+ } else {
ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
+ e.state = MDB_PG_FLAGS_PERMANENT;
+ }
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
MDBA_MDB_ENTRY_INFO);
@@ -873,8 +875,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EINVAL;
/* host join errors which can happen before creating the group */
- if (!port) {
- /* don't allow any flags for host-joined groups */
+ if (!port && !br_group_is_l2(&group)) {
+ /* don't allow any flags for host-joined IP groups */
if (entry->state) {
NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
return -EINVAL;
@@ -1023,8 +1025,8 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
}
- if (p->state == BR_STATE_DISABLED) {
- NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state");
+ if (p->state == BR_STATE_DISABLED && entry->state != MDB_PERMANENT) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent");
return -EINVAL;
}
vg = nbp_vlan_group(p);
@@ -1084,9 +1086,6 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex)
continue;
- if (p->key.port->state == BR_STATE_DISABLED)
- goto unlock;
-
br_multicast_del_pg(mp, p, pp);
err = 0;
break;
@@ -1122,8 +1121,14 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
p = br_port_get_rtnl(pdev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
return -EINVAL;
+ }
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
+ return -EINVAL;
+ }
vg = nbp_vlan_group(p);
} else {
vg = br_vlan_group(br);
diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c
new file mode 100644
index 000000000000..ee680adcee17
--- /dev/null
+++ b/net/bridge/br_mst.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Bridge Multiple Spanning Tree Support
+ *
+ * Authors:
+ * Tobias Waldekranz <tobias@waldekranz.com>
+ */
+
+#include <linux/kernel.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+DEFINE_STATIC_KEY_FALSE(br_mst_used);
+
+bool br_mst_enabled(const struct net_device *dev)
+{
+ if (!netif_is_bridge_master(dev))
+ return false;
+
+ return br_opt_get(netdev_priv(dev), BROPT_MST_ENABLED);
+}
+EXPORT_SYMBOL_GPL(br_mst_enabled);
+
+int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids)
+{
+ const struct net_bridge_vlan_group *vg;
+ const struct net_bridge_vlan *v;
+ const struct net_bridge *br;
+
+ ASSERT_RTNL();
+
+ if (!netif_is_bridge_master(dev))
+ return -EINVAL;
+
+ br = netdev_priv(dev);
+ if (!br_opt_get(br, BROPT_MST_ENABLED))
+ return -EINVAL;
+
+ vg = br_vlan_group(br);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->msti == msti)
+ __set_bit(v->vid, vids);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_mst_get_info);
+
+int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state)
+{
+ const struct net_bridge_port *p = NULL;
+ const struct net_bridge_vlan_group *vg;
+ const struct net_bridge_vlan *v;
+
+ ASSERT_RTNL();
+
+ p = br_port_get_check_rtnl(dev);
+ if (!p || !br_opt_get(p->br, BROPT_MST_ENABLED))
+ return -EINVAL;
+
+ vg = nbp_vlan_group(p);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->brvlan->msti == msti) {
+ *state = v->state;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(br_mst_get_state);
+
+static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v,
+ u8 state)
+{
+ struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+
+ if (v->state == state)
+ return;
+
+ br_vlan_set_state(v, state);
+
+ if (v->vid == vg->pvid)
+ br_vlan_set_pvid_state(vg, state);
+}
+
+int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_PORT_MST_STATE,
+ .orig_dev = p->dev,
+ .u.mst_state = {
+ .msti = msti,
+ .state = state,
+ },
+ };
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ int err;
+
+ vg = nbp_vlan_group(p);
+ if (!vg)
+ return 0;
+
+ /* MSTI 0 (CST) state changes are notified via the regular
+ * SWITCHDEV_ATTR_ID_PORT_STP_STATE.
+ */
+ if (msti) {
+ err = switchdev_port_attr_set(p->dev, &attr, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->brvlan->msti != msti)
+ continue;
+
+ br_mst_vlan_set_state(p, v, state);
+ }
+
+ return 0;
+}
+
+static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
+{
+ struct net_bridge_vlan_group *vg = nbp_vlan_group(pv->port);
+ struct net_bridge_vlan *v;
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ /* If this port already has a defined state in this
+ * MSTI (through some other VLAN membership), inherit
+ * it.
+ */
+ if (v != pv && v->brvlan->msti == msti) {
+ br_mst_vlan_set_state(pv->port, pv, v->state);
+ return;
+ }
+ }
+
+ /* Otherwise, start out in a new MSTI with all ports disabled. */
+ return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED);
+}
+
+int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_VLAN_MSTI,
+ .orig_dev = mv->br->dev,
+ .u.vlan_msti = {
+ .vid = mv->vid,
+ .msti = msti,
+ },
+ };
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *pv;
+ struct net_bridge_port *p;
+ int err;
+
+ if (mv->msti == msti)
+ return 0;
+
+ err = switchdev_port_attr_set(mv->br->dev, &attr, NULL);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ mv->msti = msti;
+
+ list_for_each_entry(p, &mv->br->port_list, list) {
+ vg = nbp_vlan_group(p);
+
+ pv = br_vlan_find(vg, mv->vid);
+ if (pv)
+ br_mst_vlan_sync_state(pv, msti);
+ }
+
+ return 0;
+}
+
+void br_mst_vlan_init_state(struct net_bridge_vlan *v)
+{
+ /* VLANs always start out in MSTI 0 (CST) */
+ v->msti = 0;
+
+ if (br_vlan_is_master(v))
+ v->state = BR_STATE_FORWARDING;
+ else
+ v->state = v->port->state;
+}
+
+int br_mst_set_enabled(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_MST,
+ .orig_dev = br->dev,
+ .u.mst = on,
+ };
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
+ int err;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ vg = nbp_vlan_group(p);
+
+ if (!vg->num_vlans)
+ continue;
+
+ NL_SET_ERR_MSG(extack,
+ "MST mode can't be changed while VLANs exist");
+ return -EBUSY;
+ }
+
+ if (br_opt_get(br, BROPT_MST_ENABLED) == on)
+ return 0;
+
+ err = switchdev_port_attr_set(br->dev, &attr, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ if (on)
+ static_branch_enable(&br_mst_used);
+ else
+ static_branch_disable(&br_mst_used);
+
+ br_opt_toggle(br, BROPT_MST_ENABLED, on);
+ return 0;
+}
+
+size_t br_mst_info_size(const struct net_bridge_vlan_group *vg)
+{
+ DECLARE_BITMAP(seen, VLAN_N_VID) = { 0 };
+ const struct net_bridge_vlan *v;
+ size_t sz;
+
+ /* IFLA_BRIDGE_MST */
+ sz = nla_total_size(0);
+
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
+ if (test_bit(v->brvlan->msti, seen))
+ continue;
+
+ /* IFLA_BRIDGE_MST_ENTRY */
+ sz += nla_total_size(0) +
+ /* IFLA_BRIDGE_MST_ENTRY_MSTI */
+ nla_total_size(sizeof(u16)) +
+ /* IFLA_BRIDGE_MST_ENTRY_STATE */
+ nla_total_size(sizeof(u8));
+
+ __set_bit(v->brvlan->msti, seen);
+ }
+
+ return sz;
+}
+
+int br_mst_fill_info(struct sk_buff *skb,
+ const struct net_bridge_vlan_group *vg)
+{
+ DECLARE_BITMAP(seen, VLAN_N_VID) = { 0 };
+ const struct net_bridge_vlan *v;
+ struct nlattr *nest;
+ int err = 0;
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (test_bit(v->brvlan->msti, seen))
+ continue;
+
+ nest = nla_nest_start_noflag(skb, IFLA_BRIDGE_MST_ENTRY);
+ if (!nest ||
+ nla_put_u16(skb, IFLA_BRIDGE_MST_ENTRY_MSTI, v->brvlan->msti) ||
+ nla_put_u8(skb, IFLA_BRIDGE_MST_ENTRY_STATE, v->state)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ nla_nest_end(skb, nest);
+
+ __set_bit(v->brvlan->msti, seen);
+ }
+
+ return err;
+}
+
+static const struct nla_policy br_mst_nl_policy[IFLA_BRIDGE_MST_ENTRY_MAX + 1] = {
+ [IFLA_BRIDGE_MST_ENTRY_MSTI] = NLA_POLICY_RANGE(NLA_U16,
+ 1, /* 0 reserved for CST */
+ VLAN_N_VID - 1),
+ [IFLA_BRIDGE_MST_ENTRY_STATE] = NLA_POLICY_RANGE(NLA_U8,
+ BR_STATE_DISABLED,
+ BR_STATE_BLOCKING),
+};
+
+static int br_mst_process_one(struct net_bridge_port *p,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MST_ENTRY_MAX + 1];
+ u16 msti;
+ u8 state;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MST_ENTRY_MAX, attr,
+ br_mst_nl_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MST_ENTRY_MSTI]) {
+ NL_SET_ERR_MSG_MOD(extack, "MSTI not specified");
+ return -EINVAL;
+ }
+
+ if (!tb[IFLA_BRIDGE_MST_ENTRY_STATE]) {
+ NL_SET_ERR_MSG_MOD(extack, "State not specified");
+ return -EINVAL;
+ }
+
+ msti = nla_get_u16(tb[IFLA_BRIDGE_MST_ENTRY_MSTI]);
+ state = nla_get_u8(tb[IFLA_BRIDGE_MST_ENTRY_STATE]);
+
+ return br_mst_set_state(p, msti, state, extack);
+}
+
+int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *attr;
+ int err, msts = 0;
+ int rem;
+
+ if (!br_opt_get(p->br, BROPT_MST_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't modify MST state when MST is disabled");
+ return -EBUSY;
+ }
+
+ nla_for_each_nested(attr, mst_attr, rem) {
+ switch (nla_type(attr)) {
+ case IFLA_BRIDGE_MST_ENTRY:
+ err = br_mst_process_one(p, attr, extack);
+ break;
+ default:
+ continue;
+ }
+
+ msts++;
+ if (err)
+ break;
+ }
+
+ if (!msts) {
+ NL_SET_ERR_MSG_MOD(extack, "Found no MST entries to process");
+ err = -EINVAL;
+ }
+
+ return err;
+}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index de2409889489..db4f2641d1cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -82,6 +82,9 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
+static int br_mc_disabled_update(struct net_device *dev, bool value,
+ struct netlink_ext_ack *extack);
+
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
struct net_bridge_port_group_sg_key *sg_p)
@@ -1156,6 +1159,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 4fd882686b04..f20f4373ff40 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -384,6 +384,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
if (rt->dst.dev == dev) {
+ skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
goto bridged_dnat;
}
@@ -413,6 +414,7 @@ bridged_dnat:
kfree_skb(skb);
return 0;
}
+ skb_dst_drop(skb);
skb_dst_set_noref(skb, &rt->dst);
}
@@ -1012,9 +1014,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
- for (i = 0; i < e->num_hook_entries &&
- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
- ;
+ for (i = 0; i < e->num_hook_entries; i++) {
+ /* These hooks have already been called */
+ if (ops[i]->priority < NF_BR_PRI_BRNF)
+ continue;
+
+ /* These hooks have not been called yet, run them. */
+ if (ops[i]->priority > NF_BR_PRI_BRNF)
+ break;
+
+ /* take a closer look at NF_BR_PRI_BRNF. */
+ if (ops[i]->hook == br_nf_pre_routing) {
+ /* This hook diverted the skb to this function,
+ * hooks after this have not been run yet.
+ */
+ i++;
+ break;
+ }
+ }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index e4e0c836c3f5..6b07f30675bb 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -197,6 +197,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
kfree_skb(skb);
return 0;
}
+ skb_dst_drop(skb);
skb_dst_set_noref(skb, &rt->dst);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 2ff83d84230d..d087fd4c784a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -119,6 +119,9 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev,
/* Each VLAN is returned in bridge_vlan_info along with flags */
vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info));
+ if (p && vg && (filter_mask & RTEXT_FILTER_MST))
+ vinfo_sz += br_mst_info_size(vg);
+
if (!(filter_mask & RTEXT_FILTER_CFM_STATUS))
return vinfo_sz;
@@ -184,6 +187,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
+ + nla_total_size(1) /* IFLA_BRPORT_LOCKED */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -269,7 +273,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
BR_MRP_LOST_CONT)) ||
nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
- nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
+ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) ||
+ nla_put_u8(skb, IFLA_BRPORT_LOCKED, !!(p->flags & BR_PORT_LOCKED)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -483,7 +488,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
RTEXT_FILTER_BRVLAN_COMPRESSED |
RTEXT_FILTER_MRP |
RTEXT_FILTER_CFM_CONFIG |
- RTEXT_FILTER_CFM_STATUS)) {
+ RTEXT_FILTER_CFM_STATUS |
+ RTEXT_FILTER_MST)) {
af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
if (!af)
goto nla_put_failure;
@@ -562,9 +568,34 @@ static int br_fill_ifinfo(struct sk_buff *skb,
nla_nest_end(skb, cfm_nest);
}
+ if ((filter_mask & RTEXT_FILTER_MST) &&
+ br_opt_get(br, BROPT_MST_ENABLED) && port) {
+ const struct net_bridge_vlan_group *vg = nbp_vlan_group(port);
+ struct nlattr *mst_nest;
+ int err;
+
+ if (!vg || !vg->num_vlans)
+ goto done;
+
+ mst_nest = nla_nest_start(skb, IFLA_BRIDGE_MST);
+ if (!mst_nest)
+ goto nla_put_failure;
+
+ err = br_mst_fill_info(skb, vg);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, mst_nest);
+ }
+
done:
- if (af)
- nla_nest_end(skb, af);
+ if (af) {
+ if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0))
+ nla_nest_end(skb, af);
+ else
+ nla_nest_cancel(skb, af);
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -801,6 +832,23 @@ static int br_afspec(struct net_bridge *br,
if (err)
return err;
break;
+ case IFLA_BRIDGE_MST:
+ if (!p) {
+ NL_SET_ERR_MSG(extack,
+ "MST states can only be set on bridge ports");
+ return -EINVAL;
+ }
+
+ if (cmd != RTM_SETLINK) {
+ NL_SET_ERR_MSG(extack,
+ "MST states can only be set through RTM_SETLINK");
+ return -EINVAL;
+ }
+
+ err = br_mst_process(p, attr, extack);
+ if (err)
+ return err;
+ break;
}
}
@@ -827,6 +875,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
[IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LOCKED] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
};
@@ -893,6 +942,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS);
br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
+ br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED);
changed_mask = old_flags ^ p->flags;
@@ -1280,8 +1330,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
br_recalculate_fwd_mask(br);
}
- if (data[IFLA_BR_FDB_FLUSH])
- br_fdb_flush(br);
+ if (data[IFLA_BR_FDB_FLUSH]) {
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BIT(BR_FDB_STATIC)
+ };
+
+ br_fdb_flush(br, &desc);
+ }
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (data[IFLA_BR_MCAST_ROUTER]) {
@@ -1719,10 +1774,10 @@ static int br_fill_linkxstats(struct sk_buff *skb,
if (v->vid == pvid)
vxi.flags |= BRIDGE_VLAN_INFO_PVID;
br_vlan_get_stats(v, &stats);
- vxi.rx_bytes = stats.rx_bytes;
- vxi.rx_packets = stats.rx_packets;
- vxi.tx_bytes = stats.tx_bytes;
- vxi.tx_packets = stats.tx_packets;
+ vxi.rx_bytes = u64_stats_read(&stats.rx_bytes);
+ vxi.rx_packets = u64_stats_read(&stats.rx_packets);
+ vxi.tx_bytes = u64_stats_read(&stats.tx_bytes);
+ vxi.tx_packets = u64_stats_read(&stats.tx_packets);
if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
goto nla_put_failure;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2661dda1a92b..06e5f6faa431 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -178,6 +178,7 @@ enum {
* @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
* @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
* context
+ * @msti: if MASTER flag set, this holds the VLANs MST instance
* @vlist: sorted list of VLAN entries
* @rcu: used for entry destruction
*
@@ -210,6 +211,8 @@ struct net_bridge_vlan {
struct net_bridge_mcast_port port_mcast_ctx;
};
+ u16 msti;
+
struct list_head vlist;
struct rcu_head rcu;
@@ -271,6 +274,13 @@ struct net_bridge_fdb_entry {
struct rcu_head rcu;
};
+struct net_bridge_fdb_flush_desc {
+ unsigned long flags;
+ unsigned long flags_mask;
+ int port_ifindex;
+ u16 vlan_id;
+};
+
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
@@ -445,6 +455,7 @@ enum net_bridge_opts {
BROPT_NO_LL_LEARN,
BROPT_VLAN_BRIDGE_BINDING,
BROPT_MCAST_VLAN_SNOOPING_ENABLED,
+ BROPT_MST_ENABLED,
};
struct net_bridge {
@@ -751,11 +762,17 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
#endif
/* br_fdb.c */
+#define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF)
+#define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP)
+#define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \
+ NTF_STICKY | NTF_OFFLOADED)
+
int br_fdb_init(void);
void br_fdb_fini(void);
int br_fdb_hash_init(struct net_bridge *br);
void br_fdb_hash_fini(struct net_bridge *br);
-void br_fdb_flush(struct net_bridge *br);
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc);
void br_fdb_find_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
@@ -776,7 +793,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, const unsigned char *addr, u16 vid);
+ struct net_device *dev, const unsigned char *addr, u16 vid,
+ struct netlink_ext_ack *extack);
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
struct netlink_ext_ack *extack);
@@ -1765,6 +1786,63 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow)
}
#endif
+/* br_mst.c */
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+DECLARE_STATIC_KEY_FALSE(br_mst_used);
+static inline bool br_mst_is_enabled(struct net_bridge *br)
+{
+ return static_branch_unlikely(&br_mst_used) &&
+ br_opt_get(br, BROPT_MST_ENABLED);
+}
+
+int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
+ struct netlink_ext_ack *extack);
+int br_mst_vlan_set_msti(struct net_bridge_vlan *v, u16 msti);
+void br_mst_vlan_init_state(struct net_bridge_vlan *v);
+int br_mst_set_enabled(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack);
+size_t br_mst_info_size(const struct net_bridge_vlan_group *vg);
+int br_mst_fill_info(struct sk_buff *skb,
+ const struct net_bridge_vlan_group *vg);
+int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr,
+ struct netlink_ext_ack *extack);
+#else
+static inline bool br_mst_is_enabled(struct net_bridge *br)
+{
+ return false;
+}
+
+static inline int br_mst_set_state(struct net_bridge_port *p, u16 msti,
+ u8 state, struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_mst_set_enabled(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline size_t br_mst_info_size(const struct net_bridge_vlan_group *vg)
+{
+ return 0;
+}
+
+static inline int br_mst_fill_info(struct sk_buff *skb,
+ const struct net_bridge_vlan_group *vg)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_mst_process(struct net_bridge_port *p,
+ const struct nlattr *mst_attr,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
struct nf_br_ops {
int (*br_dev_xmit_hook)(struct sk_buff *skb);
};
@@ -1985,7 +2063,7 @@ void br_switchdev_mdb_notify(struct net_device *dev,
struct net_bridge_port_group *pg,
int type);
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
- struct netlink_ext_ack *extack);
+ bool changed, struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
void br_switchdev_init(struct net_bridge *br);
@@ -2052,8 +2130,8 @@ static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
}
-static inline int br_switchdev_port_vlan_add(struct net_device *dev,
- u16 vid, u16 flags,
+static inline int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid,
+ u16 flags, bool changed,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1d80f34a139c..7d27b2e6038f 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -43,6 +43,12 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
return;
p->state = state;
+ if (br_opt_get(p->br, BROPT_MST_ENABLED)) {
+ err = br_mst_set_state(p, 0, state, NULL);
+ if (err)
+ br_warn(p->br, "error setting MST state on port %u(%s)\n",
+ p->port_no, netdev_name(p->dev));
+ }
err = switchdev_port_attr_set(p->dev, &attr, NULL);
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index f8fbaaa7c501..8f3d76c751dd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -72,7 +72,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
/* Flags that can be offloaded to hardware */
#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
- BR_MCAST_FLOOD | BR_BCAST_FLOOD)
+ BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \
+ BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST)
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
@@ -160,13 +161,14 @@ br_switchdev_fdb_notify(struct net_bridge *br,
}
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
- struct netlink_ext_ack *extack)
+ bool changed, struct netlink_ext_ack *extack)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.flags = flags,
.vid = vid,
+ .changed = changed,
};
return switchdev_port_obj_add(dev, &v.obj, extack);
@@ -330,6 +332,48 @@ br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx,
return err;
}
+static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
+ const void *ctx,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_port_attr_info attr_info = {
+ .info = {
+ .dev = br_dev,
+ .extack = extack,
+ .ctx = ctx,
+ },
+ };
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_vlan_group *vg;
+ struct switchdev_attr attr;
+ struct net_bridge_vlan *v;
+ int err;
+
+ attr_info.attr = &attr;
+ attr.orig_dev = br_dev;
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->msti) {
+ attr.id = SWITCHDEV_ATTR_ID_VLAN_MSTI;
+ attr.u.vlan_msti.vid = v->vid;
+ attr.u.vlan_msti.msti = v->msti;
+
+ err = nb->notifier_call(nb, SWITCHDEV_PORT_ATTR_SET,
+ &attr_info);
+ err = notifier_to_errno(err);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int
br_switchdev_vlan_replay_one(struct notifier_block *nb,
struct net_device *dev,
@@ -351,19 +395,50 @@ br_switchdev_vlan_replay_one(struct notifier_block *nb,
return notifier_to_errno(err);
}
+static int br_switchdev_vlan_replay_group(struct notifier_block *nb,
+ struct net_device *dev,
+ struct net_bridge_vlan_group *vg,
+ const void *ctx, unsigned long action,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan *v;
+ int err = 0;
+ u16 pvid;
+
+ if (!vg)
+ return 0;
+
+ pvid = br_get_pvid(vg);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.orig_dev = dev,
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = br_vlan_flags(v, pvid),
+ .vid = v->vid,
+ };
+
+ if (!br_vlan_should_use(v))
+ continue;
+
+ err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
+ action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int br_switchdev_vlan_replay(struct net_device *br_dev,
- struct net_device *dev,
const void *ctx, bool adding,
struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
- struct net_bridge_vlan_group *vg;
- struct net_bridge_vlan *v;
+ struct net_bridge *br = netdev_priv(br_dev);
struct net_bridge_port *p;
- struct net_bridge *br;
unsigned long action;
- int err = 0;
- u16 pvid;
+ int err;
ASSERT_RTNL();
@@ -373,49 +448,33 @@ static int br_switchdev_vlan_replay(struct net_device *br_dev,
if (!netif_is_bridge_master(br_dev))
return -EINVAL;
- if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
- return -EINVAL;
-
- if (netif_is_bridge_master(dev)) {
- br = netdev_priv(dev);
- vg = br_vlan_group(br);
- p = NULL;
- } else {
- p = br_port_get_rtnl(dev);
- if (WARN_ON(!p))
- return -EINVAL;
- vg = nbp_vlan_group(p);
- br = p->br;
- }
-
- if (!vg)
- return 0;
-
if (adding)
action = SWITCHDEV_PORT_OBJ_ADD;
else
action = SWITCHDEV_PORT_OBJ_DEL;
- pvid = br_get_pvid(vg);
+ err = br_switchdev_vlan_replay_group(nb, br_dev, br_vlan_group(br),
+ ctx, action, extack);
+ if (err)
+ return err;
- list_for_each_entry(v, &vg->vlan_list, vlist) {
- struct switchdev_obj_port_vlan vlan = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .flags = br_vlan_flags(v, pvid),
- .vid = v->vid,
- };
+ list_for_each_entry(p, &br->port_list, list) {
+ struct net_device *dev = p->dev;
- if (!br_vlan_should_use(v))
- continue;
+ err = br_switchdev_vlan_replay_group(nb, dev,
+ nbp_vlan_group(p),
+ ctx, action, extack);
+ if (err)
+ return err;
+ }
- err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
- action, extack);
+ if (adding) {
+ err = br_switchdev_vlan_attr_replay(br_dev, ctx, nb, extack);
if (err)
return err;
}
- return err;
+ return 0;
}
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
@@ -681,8 +740,7 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
struct net_device *dev = p->dev;
int err;
- err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb,
- extack);
+ err = br_switchdev_vlan_replay(br_dev, ctx, true, blocking_nb, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -706,11 +764,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
struct net_device *br_dev = p->br->dev;
struct net_device *dev = p->dev;
- br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+ br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
- br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
+ br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 3f7ca88c2aa3..ea733542244c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -344,7 +344,11 @@ static DEVICE_ATTR_RW(group_addr);
static int set_flush(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br_fdb_flush(br);
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BIT(BR_FDB_STATIC)
+ };
+
+ br_fdb_flush(br, &desc);
return 0;
}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 07fa76080512..74fdd8105dca 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -384,7 +384,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
return err;
}
- strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+ strscpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
}
@@ -406,7 +406,7 @@ int br_sysfs_renameif(struct net_bridge_port *p)
netdev_notice(br->dev, "unable to rename link %s to %s",
p->sysfs_name, p->dev->name);
else
- strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+ strscpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
return err;
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 84ba456a78cc..6e53dc991409 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,53 +34,70 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
}
-static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg,
+static void __vlan_add_pvid(struct net_bridge_vlan_group *vg,
const struct net_bridge_vlan *v)
{
if (vg->pvid == v->vid)
- return false;
+ return;
smp_wmb();
br_vlan_set_pvid_state(vg, v->state);
vg->pvid = v->vid;
-
- return true;
}
-static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid != vid)
- return false;
+ return;
smp_wmb();
vg->pvid = 0;
-
- return true;
}
-/* return true if anything changed, false otherwise */
-static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
+/* Update the BRIDGE_VLAN_INFO_PVID and BRIDGE_VLAN_INFO_UNTAGGED flags of @v.
+ * If @commit is false, return just whether the BRIDGE_VLAN_INFO_PVID and
+ * BRIDGE_VLAN_INFO_UNTAGGED bits of @flags would produce any change onto @v.
+ */
+static bool __vlan_flags_update(struct net_bridge_vlan *v, u16 flags,
+ bool commit)
{
struct net_bridge_vlan_group *vg;
- u16 old_flags = v->flags;
- bool ret;
+ bool change;
if (br_vlan_is_master(v))
vg = br_vlan_group(v->br);
else
vg = nbp_vlan_group(v->port);
+ /* check if anything would be changed on commit */
+ change = !!(flags & BRIDGE_VLAN_INFO_PVID) == !!(vg->pvid != v->vid) ||
+ ((flags ^ v->flags) & BRIDGE_VLAN_INFO_UNTAGGED);
+
+ if (!commit)
+ goto out;
+
if (flags & BRIDGE_VLAN_INFO_PVID)
- ret = __vlan_add_pvid(vg, v);
+ __vlan_add_pvid(vg, v);
else
- ret = __vlan_delete_pvid(vg, v->vid);
+ __vlan_delete_pvid(vg, v->vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
else
v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
- return ret || !!(old_flags ^ v->flags);
+out:
+ return change;
+}
+
+static bool __vlan_flags_would_change(struct net_bridge_vlan *v, u16 flags)
+{
+ return __vlan_flags_update(v, flags, false);
+}
+
+static void __vlan_flags_commit(struct net_bridge_vlan *v, u16 flags)
+{
+ __vlan_flags_update(v, flags, true);
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -92,7 +109,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q add.
*/
- err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack);
if (err == -EOPNOTSUPP)
return vlan_vid_add(dev, br->vlan_proto, v->vid);
v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV;
@@ -209,6 +226,24 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
kfree(v);
}
+static void br_vlan_init_state(struct net_bridge_vlan *v)
+{
+ struct net_bridge *br;
+
+ if (br_vlan_is_master(v))
+ br = v->br;
+ else
+ br = v->port->br;
+
+ if (br_opt_get(br, BROPT_MST_ENABLED)) {
+ br_mst_vlan_init_state(v);
+ return;
+ }
+
+ v->state = BR_STATE_FORWARDING;
+ v->msti = 0;
+}
+
/* This is the shared VLAN add function which works for both ports and bridge
* devices. There are four possible calls to this function in terms of the
* vlan entry type:
@@ -284,9 +319,12 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
}
br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
} else {
- err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
- if (err && err != -EOPNOTSUPP)
- goto out;
+ if (br_vlan_should_use(v)) {
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags,
+ false, extack);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ }
br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
}
@@ -302,7 +340,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
}
/* set the state before publishing */
- v->state = BR_STATE_FORWARDING;
+ br_vlan_init_state(v);
err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode,
br_vlan_rht_params);
@@ -310,7 +348,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
goto out_fdb_insert;
__vlan_add_list(v);
- __vlan_add_flags(v, flags);
+ __vlan_flags_commit(v, flags);
br_multicast_toggle_one_vlan(v, true);
if (p)
@@ -404,6 +442,7 @@ static void __vlan_flush(const struct net_bridge *br,
{
struct net_bridge_vlan *vlan, *tmp;
u16 v_start = 0, v_end = 0;
+ int err;
__vlan_delete_pvid(vg, vg->pvid);
list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) {
@@ -417,7 +456,13 @@ static void __vlan_flush(const struct net_bridge *br,
}
v_end = vlan->vid;
- __vlan_del(vlan);
+ err = __vlan_del(vlan);
+ if (err) {
+ br_err(br,
+ "port %u(%s) failed to delete vlan %d: %pe\n",
+ (unsigned int) p->port_no, p->dev->name,
+ vlan->vid, ERR_PTR(err));
+ }
}
/* notify about the last/whole vlan range */
@@ -460,8 +505,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
stats = this_cpu_ptr(v->stats);
u64_stats_update_begin(&stats->syncp);
- stats->tx_bytes += skb->len;
- stats->tx_packets++;
+ u64_stats_add(&stats->tx_bytes, skb->len);
+ u64_stats_inc(&stats->tx_packets);
u64_stats_update_end(&stats->syncp);
}
@@ -560,10 +605,10 @@ static bool __allowed_ingress(const struct net_bridge *br,
!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
- return br_vlan_state_allowed(*state, true);
- } else {
- return true;
+ if (!br_vlan_state_allowed(*state, true))
+ goto drop;
}
+ return true;
}
}
v = br_vlan_find(vg, *vid);
@@ -579,8 +624,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
stats = this_cpu_ptr(v->stats);
u64_stats_update_begin(&stats->syncp);
- stats->rx_bytes += skb->len;
- stats->rx_packets++;
+ u64_stats_add(&stats->rx_bytes, skb->len);
+ u64_stats_inc(&stats->rx_packets);
u64_stats_update_end(&stats->syncp);
}
@@ -670,18 +715,29 @@ static int br_vlan_add_existing(struct net_bridge *br,
u16 flags, bool *changed,
struct netlink_ext_ack *extack)
{
+ bool would_change = __vlan_flags_would_change(vlan, flags);
+ bool becomes_brentry = false;
int err;
- err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
if (!br_vlan_is_brentry(vlan)) {
/* Trying to change flags of non-existent bridge vlan */
- if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) {
- err = -EINVAL;
- goto err_flags;
- }
+ if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
+ return -EINVAL;
+
+ becomes_brentry = true;
+ }
+
+ /* Master VLANs that aren't brentries weren't notified before,
+ * time to notify them now.
+ */
+ if (becomes_brentry || would_change) {
+ err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags,
+ would_change, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ if (becomes_brentry) {
/* It was only kept for port vlans, now make it real */
err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid);
if (err) {
@@ -696,13 +752,13 @@ static int br_vlan_add_existing(struct net_bridge *br,
br_multicast_toggle_one_vlan(vlan, true);
}
- if (__vlan_add_flags(vlan, flags))
+ __vlan_flags_commit(vlan, flags);
+ if (would_change)
*changed = true;
return 0;
err_fdb_insert:
-err_flags:
br_switchdev_port_vlan_del(br->dev, vlan->vid);
return err;
}
@@ -1247,11 +1303,18 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
*changed = false;
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
- /* Pass the flags to the hardware bridge */
- ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack);
- if (ret && ret != -EOPNOTSUPP)
- return ret;
- *changed = __vlan_add_flags(vlan, flags);
+ bool would_change = __vlan_flags_would_change(vlan, flags);
+
+ if (would_change) {
+ /* Pass the flags to the hardware bridge */
+ ret = br_switchdev_port_vlan_add(port->dev, vid, flags,
+ true, extack);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+ }
+
+ __vlan_flags_commit(vlan, flags);
+ *changed = would_change;
return 0;
}
@@ -1316,16 +1379,16 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
cpu_stats = per_cpu_ptr(v->stats, i);
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- rxpackets = cpu_stats->rx_packets;
- rxbytes = cpu_stats->rx_bytes;
- txbytes = cpu_stats->tx_bytes;
- txpackets = cpu_stats->tx_packets;
+ rxpackets = u64_stats_read(&cpu_stats->rx_packets);
+ rxbytes = u64_stats_read(&cpu_stats->rx_bytes);
+ txbytes = u64_stats_read(&cpu_stats->tx_bytes);
+ txpackets = u64_stats_read(&cpu_stats->tx_packets);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->rx_packets += rxpackets;
- stats->rx_bytes += rxbytes;
- stats->tx_bytes += txbytes;
- stats->tx_packets += txpackets;
+ u64_stats_add(&stats->rx_packets, rxpackets);
+ u64_stats_add(&stats->rx_bytes, rxbytes);
+ u64_stats_add(&stats->tx_bytes, txbytes);
+ u64_stats_add(&stats->tx_packets, txpackets);
}
}
@@ -1716,14 +1779,18 @@ static bool br_vlan_stats_fill(struct sk_buff *skb,
return false;
br_vlan_get_stats(v, &stats);
- if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES, stats.rx_bytes,
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes),
BRIDGE_VLANDB_STATS_PAD) ||
nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_RX_PACKETS,
- stats.rx_packets, BRIDGE_VLANDB_STATS_PAD) ||
- nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES, stats.tx_bytes,
+ u64_stats_read(&stats.rx_packets),
+ BRIDGE_VLANDB_STATS_PAD) ||
+ nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_BYTES,
+ u64_stats_read(&stats.tx_bytes),
BRIDGE_VLANDB_STATS_PAD) ||
nla_put_u64_64bit(skb, BRIDGE_VLANDB_STATS_TX_PACKETS,
- stats.tx_packets, BRIDGE_VLANDB_STATS_PAD))
+ u64_stats_read(&stats.tx_packets),
+ BRIDGE_VLANDB_STATS_PAD))
goto out_err;
nla_nest_end(skb, nest);
@@ -2020,7 +2087,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto out_err;
}
err = br_vlan_dump_dev(dev, skb, cb, dump_flags);
- if (err && err != -EMSGSIZE)
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
goto out_err;
} else {
for_each_netdev_rcu(net, dev) {
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index a6382973b3e7..a2724d03278c 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -99,6 +99,11 @@ static int br_vlan_modify_state(struct net_bridge_vlan_group *vg,
return -EBUSY;
}
+ if (br_opt_get(br, BROPT_MST_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't modify vlan state directly when MST is enabled");
+ return -EBUSY;
+ }
+
if (v->state == state)
return 0;
@@ -291,6 +296,7 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *r_end)
{
return v_curr->vid - r_end->vid == 1 &&
+ v_curr->msti == r_end->msti &&
((v_curr->priv_flags ^ r_end->priv_flags) &
BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 &&
br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx,
@@ -379,6 +385,9 @@ bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
#endif
#endif
+ if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_MSTI, v_opts->msti))
+ goto out_err;
+
nla_nest_end(skb, nest);
return true;
@@ -410,6 +419,7 @@ static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v)
+ nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+ br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
#endif
+ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_MSTI */
+ nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */
}
@@ -559,6 +569,15 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
}
#endif
#endif
+ if (tb[BRIDGE_VLANDB_GOPTS_MSTI]) {
+ u16 msti;
+
+ msti = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_MSTI]);
+ err = br_mst_vlan_set_msti(v, msti);
+ if (err)
+ return err;
+ *changed = true;
+ }
return 0;
}
@@ -578,6 +597,7 @@ static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = {
[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MSTI] = NLA_POLICY_MAX(NLA_U16, VLAN_N_VID - 1),
};
int br_vlan_rtm_process_global_options(struct net_device *dev,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 1a11064f9990..8f19253024b0 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)&initial_chain,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~(1 << NF_BR_BROUTING))
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table broute_table = {
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index cb949436bc0e..278f324e6752 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~FILTER_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_filter = {
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 5ee0531ae506..9066f7f376d5 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~NAT_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f2dbefb61ce8..ce5dfa3babd2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1040,9 +1040,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
goto free_iterate;
}
- /* the table doesn't like it */
- if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
+ if (repl->valid_hooks != t->valid_hooks) {
+ ret = -EINVAL;
goto free_unlock;
+ }
if (repl->num_counters && repl->num_counters != t->private->nentries) {
ret = -EINVAL;
@@ -1231,11 +1232,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
if (ret != 0)
goto free_chainstack;
- if (table->check && table->check(newinfo, table->valid_hooks)) {
- ret = -EINVAL;
- goto free_chainstack;
- }
-
table->private = newinfo;
rwlock_init(&table->lock);
mutex_lock(&ebt_mutex);
@@ -1446,7 +1442,7 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
/* ebtables expects 31 bytes long names but xt_match names are 29 bytes
* long. Copy 29 bytes and fill remaining bytes with zeroes.
*/
- strlcpy(name, _name, sizeof(name));
+ strscpy(name, _name, sizeof(name));
if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) ||
put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) ||
put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) ||
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index fdbed3158555..73242962be5d 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -32,6 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ bool mono_delivery_time = skb->mono_delivery_time;
unsigned int hlen, ll_rs, mtu;
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
@@ -81,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -112,7 +113,7 @@ slow_path:
goto blackhole;
}
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
@@ -380,7 +381,7 @@ static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
protoff = skb_network_offset(skb) + ip_hdrlen(skb);
break;
case htons(ETH_P_IPV6): {
- unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index c1ef9cc89b78..c3ecd77e25cb 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -53,7 +53,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
goto err;
br_vlan_get_proto(br_dev, &p_proto);
- nft_reg_store16(dest, htons(p_proto));
+ nft_reg_store_be16(dest, htons(p_proto));
return;
}
default:
@@ -87,6 +87,7 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
return nft_meta_get_init(ctx, expr, tb);
}
+ priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
@@ -98,6 +99,7 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = {
.eval = nft_meta_bridge_get_eval,
.init = nft_meta_bridge_get_init,
.dump = nft_meta_get_dump,
+ .reduce = nft_meta_get_reduce,
};
static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
@@ -112,8 +114,7 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops)
continue;
- track->regs[i].selector = NULL;
- track->regs[i].bitwise = NULL;
+ __nft_reg_track_cancel(track, i);
}
return false;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index eba0efe64d05..71b54fed7263 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
@@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
@@ -185,6 +185,7 @@ static const struct nft_expr_ops nft_reject_bridge_ops = {
.init = nft_reject_init,
.dump = nft_reject_dump,
.validate = nft_reject_bridge_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_reject_bridge_type __read_mostly = {
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 440139706130..6a0cba4fc29f 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -268,7 +268,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
err = caifd->layer.up->receive(caifd->layer.up, pkt);
- /* For -EILSEQ the packet is not freed so so it now */
+ /* For -EILSEQ the packet is not freed so free it now */
if (err == -EILSEQ)
cfpkt_destroy(pkt);
@@ -342,7 +342,7 @@ int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_lock(&caifdevs->lock);
list_add_rcu(&caifd->list, &caifdevs->list);
- strlcpy(caifd->layer.name, dev->name,
+ strscpy(caifd->layer.name, dev->name,
sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
res = cfcnfg_add_phy_layer(cfg,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2b8892d502f7..748be7253248 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -47,7 +47,7 @@ enum caif_states {
struct caifsock {
struct sock sk; /* must be first member */
struct cflayer layer;
- u32 flow_state;
+ unsigned long flow_state;
struct caif_connect_request conn_req;
struct mutex readlock;
struct dentry *debugfs_socket_dir;
@@ -56,38 +56,32 @@ struct caifsock {
static int rx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static int tx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_on(struct caifsock *cf_sk)
{
- set_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_on(struct caifsock *cf_sk)
{
- set_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void caif_read_lock(struct sock *sk)
@@ -282,7 +276,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
if (flags & MSG_OOB)
goto read_error;
- skb = skb_recv_datagram(sk, flags, 0 , &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
goto read_error;
copylen = skb->len;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 4be6b04879a1..ebc202ffdd8d 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -184,7 +184,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
dev_add_pack(&caif_usb_type);
pack_added = true;
- strlcpy(layer->name, dev->name, sizeof(layer->name));
+ strscpy(layer->name, dev->name, sizeof(layer->name));
return 0;
err:
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 23267c8db7c4..52509e185960 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -268,14 +268,14 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
case CAIFPROTO_RFM:
l->linktype = CFCTRL_SRV_RFM;
l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
- strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+ strscpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
sizeof(l->u.rfm.volume));
break;
case CAIFPROTO_UTIL:
l->linktype = CFCTRL_SRV_UTIL;
l->endpoint = 0x00;
l->chtype = 0x00;
- strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+ strscpy(l->u.utility.name, s->sockaddr.u.util.service,
sizeof(l->u.utility.name));
caif_assert(sizeof(l->u.utility.name) > 10);
l->u.utility.paramlen = s->param.size;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 2809cbd6b7f7..cc405d8c7c30 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -258,7 +258,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
cfpkt_add_body(pkt, &tmp16, 2);
memset(utility_name, 0, sizeof(utility_name));
- strlcpy(utility_name, param->u.utility.name,
+ strscpy(utility_name, param->u.utility.name,
UTILITY_NAME_LENGTH);
cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
tmp8 = param->u.utility.paramlen;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 414dc5671c45..4d63ef13a1fd 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -99,7 +99,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
else
skb->ip_summed = CHECKSUM_NONE;
- netif_rx_any_context(skb);
+ netif_rx(skb);
/* Update statistics. */
priv->netdev->stats.rx_packets++;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index a9ac5ffab286..cb56be8e3862 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -15,7 +15,8 @@ menuconfig CAN
PF_CAN is contained in <Documentation/networking/can.rst>.
If you want CAN support you should say Y here and also to the
- specific driver for your controller(s) below.
+ specific driver for your controller(s) under the Network device
+ support section.
if CAN
@@ -69,6 +70,4 @@ config CAN_ISOTP
If you want to perform automotive vehicle diagnostic services (UDS),
say 'y'.
-source "drivers/net/can/Kconfig"
-
endif
diff --git a/net/can/af_can.c b/net/can/af_can.c
index cce2af10eb3e..27dcdcc0b808 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -199,27 +199,26 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
- struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats;
int err = -EINVAL;
- if (skb->len == CAN_MTU) {
+ if (can_is_canxl_skb(skb)) {
+ skb->protocol = htons(ETH_P_CANXL);
+ } else if (can_is_can_skb(skb)) {
skb->protocol = htons(ETH_P_CAN);
- if (unlikely(cfd->len > CAN_MAX_DLEN))
- goto inval_skb;
- } else if (skb->len == CANFD_MTU) {
+ } else if (can_is_canfd_skb(skb)) {
+ struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+
skb->protocol = htons(ETH_P_CANFD);
- if (unlikely(cfd->len > CANFD_MAX_DLEN))
- goto inval_skb;
+
+ /* set CAN FD flag for CAN FD frames by default */
+ cfd->flags |= CANFD_FDF;
} else {
goto inval_skb;
}
- /* Make sure the CAN frame can pass the selected CAN netdevice.
- * As structs can_frame and canfd_frame are similar, we can provide
- * CAN FD frames to legacy CAN drivers as long as the length is <= 8
- */
- if (unlikely(skb->len > skb->dev->mtu && cfd->len > CAN_MAX_DLEN)) {
+ /* Make sure the CAN frame can pass the selected CAN netdevice. */
+ if (unlikely(skb->len > skb->dev->mtu)) {
err = -EMSGSIZE;
goto inval_skb;
}
@@ -284,7 +283,7 @@ int can_send(struct sk_buff *skb, int loop)
}
if (newskb)
- netif_rx_ni(newskb);
+ netif_rx(newskb);
/* update statistics */
pkg_stats->tx_frames++;
@@ -451,7 +450,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
/* insert new receiver (dev,canid,mask) -> (func,data) */
- if (dev && dev->type != ARPHRD_CAN)
+ if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev)))
return -ENODEV;
if (dev && !net_eq(net, dev_net(dev)))
@@ -678,53 +677,46 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
static int can_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
-
- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) {
+ if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) {
pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
dev->type, skb->len);
- goto free_skb;
- }
- /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
- if (unlikely(cfd->len > CAN_MAX_DLEN)) {
- pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len);
- goto free_skb;
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-free_skb:
- kfree_skb(skb);
- return NET_RX_DROP;
}
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
-
- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) {
+ if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) {
pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
dev->type, skb->len);
- goto free_skb;
- }
- /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
- if (unlikely(cfd->len > CANFD_MAX_DLEN)) {
- pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len);
- goto free_skb;
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
can_receive(skb, dev);
return NET_RX_SUCCESS;
+}
-free_skb:
- kfree_skb(skb);
- return NET_RX_DROP;
+static int canxl_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) {
+ pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n",
+ dev->type, skb->len);
+
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ can_receive(skb, dev);
+ return NET_RX_SUCCESS;
}
/* af_can protocol functions */
@@ -851,6 +843,11 @@ static struct packet_type canfd_packet __read_mostly = {
.func = canfd_rcv,
};
+static struct packet_type canxl_packet __read_mostly = {
+ .type = cpu_to_be16(ETH_P_CANXL),
+ .func = canxl_rcv,
+};
+
static const struct net_proto_family can_family_ops = {
.family = PF_CAN,
.create = can_create,
@@ -890,6 +887,7 @@ static __init int can_init(void)
dev_add_pack(&can_packet);
dev_add_pack(&canfd_packet);
+ dev_add_pack(&canxl_packet);
return 0;
@@ -904,6 +902,7 @@ out_pernet:
static __exit void can_exit(void)
{
/* protocol unregister */
+ dev_remove_pack(&canxl_packet);
dev_remove_pack(&canfd_packet);
dev_remove_pack(&can_packet);
sock_unregister(PF_CAN);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index bc88d901a1c0..27706f6ace34 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -100,6 +100,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
struct bcm_op {
struct list_head list;
+ struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
@@ -193,7 +194,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
{
char ifname[IFNAMSIZ];
struct net *net = m->private;
- struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
+ struct sock *sk = (struct sock *)pde_data(m->file->f_inode);
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
@@ -273,6 +274,7 @@ static void bcm_can_tx(struct bcm_op *op)
struct sk_buff *skb;
struct net_device *dev;
struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
+ int err;
/* no target device? => exit */
if (!op->ifindex)
@@ -297,11 +299,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* send with loopback */
skb->dev = dev;
can_skb_set_owner(skb, op->sk);
- can_send(skb, 1);
+ err = can_send(skb, 1);
+ if (!err)
+ op->frames_abs++;
- /* update statistics */
op->currframe++;
- op->frames_abs++;
/* reached last frame? */
if (op->currframe >= op->nframes)
@@ -647,8 +649,13 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
return;
/* make sure to handle the correct frame type (CAN / CAN FD) */
- if (skb->len != op->cfsiz)
- return;
+ if (op->flags & CAN_FD_FRAME) {
+ if (!can_is_canfd_skb(skb))
+ return;
+ } else {
+ if (!can_is_can_skb(skb))
+ return;
+ }
/* disable timeout */
hrtimer_cancel(&op->timer);
@@ -718,10 +725,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
return NULL;
}
-static void bcm_remove_op(struct bcm_op *op)
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
{
- hrtimer_cancel(&op->timer);
- hrtimer_cancel(&op->thrtimer);
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -732,6 +738,14 @@ static void bcm_remove_op(struct bcm_op *op)
kfree(op);
}
+static void bcm_remove_op(struct bcm_op *op)
+{
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
+
+ call_rcu(&op->rcu, bcm_free_op_rcu);
+}
+
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
@@ -757,6 +771,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+ /* disable automatic timer on frame reception */
+ op->flags |= RX_NO_AUTOTIMER;
+
/*
* Don't care if we're bound or not (due to netdev
* problems) can_rx_unregister() is always a save
@@ -785,7 +802,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
bcm_rx_handler, op);
list_del(&op->list);
- synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
@@ -1632,12 +1648,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error = 0;
- int noblock;
int err;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
- skb = skb_recv_datagram(sk, flags, noblock, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
@@ -1650,7 +1663,7 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return err;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(BCM_MIN_NAMELEN);
@@ -1737,15 +1750,27 @@ static int __init bcm_module_init(void)
pr_info("can: broadcast manager protocol\n");
+ err = register_pernet_subsys(&canbcm_pernet_ops);
+ if (err)
+ return err;
+
+ err = register_netdevice_notifier(&canbcm_notifier);
+ if (err)
+ goto register_notifier_failed;
+
err = can_proto_register(&bcm_can_proto);
if (err < 0) {
printk(KERN_ERR "can: registration of bcm protocol failed\n");
- return err;
+ goto register_proto_failed;
}
- register_pernet_subsys(&canbcm_pernet_ops);
- register_netdevice_notifier(&canbcm_notifier);
return 0;
+
+register_proto_failed:
+ unregister_netdevice_notifier(&canbcm_notifier);
+register_notifier_failed:
+ unregister_pernet_subsys(&canbcm_pernet_ops);
+ return err;
}
static void __exit bcm_module_exit(void)
diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f15..23a3d89cad81 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -463,10 +463,10 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
/* process strictly Classic CAN or CAN FD frames */
if (gwj->flags & CGW_FLAGS_CAN_FD) {
- if (skb->len != CANFD_MTU)
+ if (!can_is_canfd_skb(skb))
return;
} else {
- if (skb->len != CAN_MTU)
+ if (!can_is_can_skb(skb))
return;
}
@@ -577,6 +577,13 @@ static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj)
gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}
+static void cgw_job_free_rcu(struct rcu_head *rcu_head)
+{
+ struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu);
+
+ kmem_cache_free(cgw_cache, gwj);
+}
+
static int cgw_notifier(struct notifier_block *nb,
unsigned long msg, void *ptr)
{
@@ -596,8 +603,7 @@ static int cgw_notifier(struct notifier_block *nb,
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
}
}
}
@@ -1155,8 +1161,7 @@ static void cgw_remove_all_jobs(struct net *net)
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
}
}
@@ -1224,8 +1229,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
err = 0;
break;
}
@@ -1239,16 +1243,19 @@ static int __net_init cangw_pernet_init(struct net *net)
return 0;
}
-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- cgw_remove_all_jobs(net);
+ list_for_each_entry(net, net_list, exit_list)
+ cgw_remove_all_jobs(net);
rtnl_unlock();
}
static struct pernet_operations cangw_pernet_ops = {
.init = cangw_pernet_init,
- .exit = cangw_pernet_exit,
+ .exit_batch = cangw_pernet_exit_batch,
};
static __init int cgw_module_init(void)
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 02cbcb2ecf0d..608f8c24ae46 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -14,7 +14,6 @@
* - use CAN_ISOTP_WAIT_TX_DONE flag to block the caller until the PDU is sent
* - as we have static buffers the check whether the PDU fits into the buffer
* is done at FF reception time (no support for sending 'wait frames')
- * - take care of the tx-queue-len as traffic shaping is still on the TODO list
*
* Copyright (c) 2020 Volkswagen Group Electronic Research
* All rights reserved.
@@ -56,6 +55,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/spinlock.h>
#include <linux/hrtimer.h>
#include <linux/wait.h>
#include <linux/uio.h>
@@ -86,9 +86,9 @@ MODULE_ALIAS("can-proto-6");
/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can
* take full 32 bit values (4 Gbyte). We would need some good concept to handle
* this between user space and kernel space. For now increase the static buffer
- * to something about 8 kbyte to be able to test this new functionality.
+ * to something about 64 kbyte to be able to test this new functionality.
*/
-#define MAX_MSG_LENGTH 8200
+#define MAX_MSG_LENGTH 66000
/* N_PCI type values in bits 7-4 of N_PCI bytes */
#define N_PCI_SF 0x00 /* single frame */
@@ -104,12 +104,16 @@ MODULE_ALIAS("can-proto-6");
#define FC_CONTENT_SZ 3 /* flow control content size in byte (FS/BS/STmin) */
#define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA)
+#define ISOTP_ALL_BC_FLAGS (CAN_ISOTP_SF_BROADCAST | CAN_ISOTP_CF_BROADCAST)
/* Flow Status given in FC frame */
#define ISOTP_FC_CTS 0 /* clear to send */
#define ISOTP_FC_WT 1 /* wait */
#define ISOTP_FC_OVFLW 2 /* overflow */
+#define ISOTP_FC_TIMEOUT 1 /* 1 sec */
+#define ISOTP_ECHO_TIMEOUT 2 /* 2 secs */
+
enum {
ISOTP_IDLE = 0,
ISOTP_WAIT_FIRST_FC,
@@ -140,11 +144,14 @@ struct isotp_sock {
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
+ u32 frame_txtime;
u32 force_tx_stmin;
u32 force_rx_stmin;
+ u32 cfecho; /* consecutive frame echo tag */
struct tpcon rx, tx;
struct list_head notifier;
wait_queue_head_t wait;
+ spinlock_t rx_lock; /* protect single thread state machine */
};
static LIST_HEAD(isotp_notifier_list);
@@ -156,6 +163,23 @@ static inline struct isotp_sock *isotp_sk(const struct sock *sk)
return (struct isotp_sock *)sk;
}
+static u32 isotp_bc_flags(struct isotp_sock *so)
+{
+ return so->opt.flags & ISOTP_ALL_BC_FLAGS;
+}
+
+static bool isotp_register_rxid(struct isotp_sock *so)
+{
+ /* no broadcast modes => register rx_id for FC frame reception */
+ return (isotp_bc_flags(so) == 0);
+}
+
+static bool isotp_register_txecho(struct isotp_sock *so)
+{
+ /* all modes but SF_BROADCAST register for tx echo skbs */
+ return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
+}
+
static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -237,7 +261,8 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
so->lastrxcf_tstamp = ktime_set(0, 0);
/* start rx timeout watchdog */
- hrtimer_start(&so->rxtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ hrtimer_start(&so->rxtimer, ktime_set(ISOTP_FC_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
return 0;
}
@@ -323,6 +348,8 @@ static int check_pad(struct isotp_sock *so, struct canfd_frame *cf,
return 0;
}
+static void isotp_send_cframe(struct isotp_sock *so);
+
static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
{
struct sock *sk = &so->sk;
@@ -358,7 +385,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
so->tx_gap = ktime_set(0, 0);
/* add transmission time for CAN frame N_As */
- so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+ so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
/* add waiting time for consecutive frames N_Cs */
if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
so->tx_gap = ktime_add_ns(so->tx_gap,
@@ -377,14 +404,15 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
case ISOTP_FC_CTS:
so->tx.bs = 0;
so->tx.state = ISOTP_SENDING;
- /* start cyclic timer for sending CF frame */
- hrtimer_start(&so->txtimer, so->tx_gap,
+ /* send CF frame and enable echo timeout handling */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
HRTIMER_MODE_REL_SOFT);
+ isotp_send_cframe(so);
break;
case ISOTP_FC_WT:
/* start timer to wait for next FC frame */
- hrtimer_start(&so->txtimer, ktime_set(1, 0),
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_FC_TIMEOUT, 0),
HRTIMER_MODE_REL_SOFT);
break;
@@ -579,7 +607,7 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
/* perform blocksize handling, if enabled */
if (!so->rxfc.bs || ++so->rx.bs < so->rxfc.bs) {
/* start rx timeout watchdog */
- hrtimer_start(&so->rxtimer, ktime_set(1, 0),
+ hrtimer_start(&so->rxtimer, ktime_set(ISOTP_FC_TIMEOUT, 0),
HRTIMER_MODE_REL_SOFT);
return 0;
}
@@ -615,11 +643,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
n_pci_type = cf->data[ae] & 0xF0;
+ /* Make sure the state changes and data structures stay consistent at
+ * CAN frame reception time. This locking is not needed in real world
+ * use cases but the inconsistency can be triggered with syzkaller.
+ */
+ spin_lock(&so->rx_lock);
+
if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
/* check rx/tx path half duplex expectations */
if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
(so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
- return;
+ goto out_unlock;
}
switch (n_pci_type) {
@@ -642,7 +676,7 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
if (cf->len <= CAN_MAX_DLEN) {
isotp_rcv_sf(sk, cf, SF_PCI_SZ4 + ae, skb, sf_dl);
} else {
- if (skb->len == CANFD_MTU) {
+ if (can_is_canfd_skb(skb)) {
/* We have a CAN FD frame and CAN_DL is greater than 8:
* Only frames with the SF_DL == 0 ESC value are valid.
*
@@ -668,6 +702,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
isotp_rcv_cf(sk, cf, ae, skb);
break;
}
+
+out_unlock:
+ spin_unlock(&so->rx_lock);
}
static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
@@ -701,6 +738,63 @@ static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
cf->data[0] = so->opt.ext_address;
}
+static void isotp_send_cframe(struct isotp_sock *so)
+{
+ struct sock *sk = &so->sk;
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf;
+ int can_send_ret;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev)
+ return;
+
+ skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv), GFP_ATOMIC);
+ if (!skb) {
+ dev_put(dev);
+ return;
+ }
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+ cf = (struct canfd_frame *)skb->data;
+ skb_put_zero(skb, so->ll.mtu);
+
+ /* create consecutive frame */
+ isotp_fill_dataframe(cf, so, ae, 0);
+
+ /* place consecutive frame N_PCI in appropriate index */
+ cf->data[ae] = N_PCI_CF | so->tx.sn++;
+ so->tx.sn %= 16;
+ so->tx.bs++;
+
+ cf->flags = so->ll.tx_flags;
+
+ skb->dev = dev;
+ can_skb_set_owner(skb, sk);
+
+ /* cfecho should have been zero'ed by init/isotp_rcv_echo() */
+ if (so->cfecho)
+ pr_notice_once("can-isotp: cfecho is %08X != 0\n", so->cfecho);
+
+ /* set consecutive frame echo tag */
+ so->cfecho = *(u32 *)cf->data;
+
+ /* send frame with local echo enabled */
+ can_send_ret = can_send(skb, 1);
+ if (can_send_ret) {
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(can_send_ret));
+ if (can_send_ret == -ENOBUFS)
+ pr_notice_once("can-isotp: tx queue is full\n");
+ }
+ dev_put(dev);
+}
+
static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
int ae)
{
@@ -734,7 +828,50 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
cf->data[i] = so->tx.buf[so->tx.idx++];
so->tx.sn = 1;
- so->tx.state = ISOTP_WAIT_FIRST_FC;
+}
+
+static void isotp_rcv_echo(struct sk_buff *skb, void *data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct isotp_sock *so = isotp_sk(sk);
+ struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+
+ /* only handle my own local echo CF/SF skb's (no FF!) */
+ if (skb->sk != sk || so->cfecho != *(u32 *)cf->data)
+ return;
+
+ /* cancel local echo timeout */
+ hrtimer_cancel(&so->txtimer);
+
+ /* local echo skb with consecutive frame has been consumed */
+ so->cfecho = 0;
+
+ if (so->tx.idx >= so->tx.len) {
+ /* we are done */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+ return;
+ }
+
+ if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
+ /* stop and wait for FC with timeout */
+ so->tx.state = ISOTP_WAIT_FC;
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_FC_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
+ return;
+ }
+
+ /* no gap between data frames needed => use burst mode */
+ if (!so->tx_gap) {
+ /* enable echo timeout handling */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
+ isotp_send_cframe(so);
+ return;
+ }
+
+ /* start timer to send next consecutive frame with correct delay */
+ hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -742,14 +879,28 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- struct sk_buff *skb;
- struct net_device *dev;
- struct canfd_frame *cf;
enum hrtimer_restart restart = HRTIMER_NORESTART;
- int can_send_ret;
- int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
switch (so->tx.state) {
+ case ISOTP_SENDING:
+
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (!so->cfecho) {
+ /* start timeout for unlikely lost echo skb */
+ hrtimer_set_expires(&so->txtimer,
+ ktime_add(ktime_get(),
+ ktime_set(ISOTP_ECHO_TIMEOUT, 0)));
+ restart = HRTIMER_RESTART;
+
+ /* push out the next consecutive frame */
+ isotp_send_cframe(so);
+ break;
+ }
+
+ /* cfecho has not been cleared in isotp_rcv_echo() */
+ pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
+ fallthrough;
+
case ISOTP_WAIT_FC:
case ISOTP_WAIT_FIRST_FC:
@@ -765,80 +916,9 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
wake_up_interruptible(&so->wait);
break;
- case ISOTP_SENDING:
-
- /* push out the next segmented pdu */
- dev = dev_get_by_index(sock_net(sk), so->ifindex);
- if (!dev)
- break;
-
-isotp_tx_burst:
- skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv),
- GFP_ATOMIC);
- if (!skb) {
- dev_put(dev);
- break;
- }
-
- can_skb_reserve(skb);
- can_skb_prv(skb)->ifindex = dev->ifindex;
- can_skb_prv(skb)->skbcnt = 0;
-
- cf = (struct canfd_frame *)skb->data;
- skb_put_zero(skb, so->ll.mtu);
-
- /* create consecutive frame */
- isotp_fill_dataframe(cf, so, ae, 0);
-
- /* place consecutive frame N_PCI in appropriate index */
- cf->data[ae] = N_PCI_CF | so->tx.sn++;
- so->tx.sn %= 16;
- so->tx.bs++;
-
- cf->flags = so->ll.tx_flags;
-
- skb->dev = dev;
- can_skb_set_owner(skb, sk);
-
- can_send_ret = can_send(skb, 1);
- if (can_send_ret) {
- pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
- __func__, ERR_PTR(can_send_ret));
- if (can_send_ret == -ENOBUFS)
- pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n");
- }
- if (so->tx.idx >= so->tx.len) {
- /* we are done */
- so->tx.state = ISOTP_IDLE;
- dev_put(dev);
- wake_up_interruptible(&so->wait);
- break;
- }
-
- if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
- /* stop and wait for FC */
- so->tx.state = ISOTP_WAIT_FC;
- dev_put(dev);
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(1, 0)));
- restart = HRTIMER_RESTART;
- break;
- }
-
- /* no gap between data frames needed => use burst mode */
- if (!so->tx_gap)
- goto isotp_tx_burst;
-
- /* start timer to send next data frame with correct delay */
- dev_put(dev);
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(), so->tx_gap));
- restart = HRTIMER_RESTART;
- break;
-
default:
- WARN_ON_ONCE(1);
+ WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n",
+ so->tx.state, so->cfecho);
}
return restart;
@@ -854,6 +934,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct canfd_frame *cf;
int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+ s64 hrtimer_sec = ISOTP_ECHO_TIMEOUT;
int off;
int err;
@@ -872,38 +953,40 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
if (err)
goto err_out;
+
+ so->tx.state = ISOTP_SENDING;
}
if (!size || size > MAX_MSG_LENGTH) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
/* does the given data fit into a single frame for SF_BROADCAST? */
- if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
+ if ((isotp_bc_flags(so) == CAN_ISOTP_SF_BROADCAST) &&
(size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
err = memcpy_from_msg(so->tx.buf, msg, size);
if (err < 0)
- goto err_out;
+ goto err_out_drop;
dev = dev_get_by_index(sock_net(sk), so->ifindex);
if (!dev) {
err = -ENXIO;
- goto err_out;
+ goto err_out_drop;
}
skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb) {
dev_put(dev);
- goto err_out;
+ goto err_out_drop;
}
can_skb_reserve(skb);
@@ -916,6 +999,10 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
cf = (struct canfd_frame *)skb->data;
skb_put_zero(skb, so->ll.mtu);
+ /* cfecho should have been zero'ed by init / former isotp_rcv_echo() */
+ if (so->cfecho)
+ pr_notice_once("can-isotp: uninit cfecho %08X\n", so->cfecho);
+
/* check for single frame transmission depending on TX_DL */
if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) {
/* The message size generally fits into a SingleFrame - good.
@@ -941,20 +1028,40 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
else
cf->data[ae] |= size;
- so->tx.state = ISOTP_IDLE;
- wake_up_interruptible(&so->wait);
-
- /* don't enable wait queue for a single frame transmission */
- wait_tx_done = 0;
+ /* set CF echo tag for isotp_rcv_echo() (SF-mode) */
+ so->cfecho = *(u32 *)cf->data;
} else {
- /* send first frame and wait for FC */
+ /* send first frame */
isotp_create_fframe(cf, so, ae);
- /* start timeout for FC */
- hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ if (isotp_bc_flags(so) == CAN_ISOTP_CF_BROADCAST) {
+ /* set timer for FC-less operation (STmin = 0) */
+ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
+ so->tx_gap = ktime_set(0, so->force_tx_stmin);
+ else
+ so->tx_gap = ktime_set(0, so->frame_txtime);
+
+ /* disable wait for FCs due to activated block size */
+ so->txfc.bs = 0;
+
+ /* set CF echo tag for isotp_rcv_echo() (CF-mode) */
+ so->cfecho = *(u32 *)cf->data;
+ } else {
+ /* standard flow control check */
+ so->tx.state = ISOTP_WAIT_FIRST_FC;
+
+ /* start timeout for FC */
+ hrtimer_sec = ISOTP_FC_TIMEOUT;
+
+ /* no CF echo tag for isotp_rcv_echo() (FF-mode) */
+ so->cfecho = 0;
+ }
}
+ hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
+ HRTIMER_MODE_REL_SOFT);
+
/* send the first or only CAN frame */
cf->flags = so->ll.tx_flags;
@@ -965,7 +1072,14 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
- goto err_out;
+
+ /* no transmission -> no timeout monitoring */
+ hrtimer_cancel(&so->txtimer);
+
+ /* reset consecutive frame echo tag */
+ so->cfecho = 0;
+
+ goto err_out_drop;
}
if (wait_tx_done) {
@@ -978,6 +1092,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return size;
+err_out_drop:
+ /* drop this PDU and unlock a potential wait queue */
+ old_state = ISOTP_IDLE;
err_out:
so->tx.state = old_state;
if (so->tx.state == ISOTP_IDLE)
@@ -991,26 +1108,27 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
- int err = 0;
- int noblock;
+ struct isotp_sock *so = isotp_sk(sk);
+ int ret = 0;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EINVAL;
+
+ if (!so->bound)
+ return -EADDRNOTAVAIL;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
- return err;
+ return ret;
if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
else
size = skb->len;
- err = memcpy_to_msg(msg, skb->data, size);
- if (err < 0) {
- skb_free_datagram(sk, skb);
- return err;
- }
+ ret = memcpy_to_msg(msg, skb->data, size);
+ if (ret < 0)
+ goto out_err;
sock_recv_timestamp(msg, sk, skb);
@@ -1020,9 +1138,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* set length of return value */
+ ret = (flags & MSG_TRUNC) ? skb->len : size;
+
+out_err:
skb_free_datagram(sk, skb);
- return size;
+ return ret;
}
static int isotp_release(struct socket *sock)
@@ -1052,15 +1174,20 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
+ if (so->bound && isotp_register_txecho(so)) {
if (so->ifindex) {
struct net_device *dev;
dev = dev_get_by_index(net, so->ifindex);
if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
+ if (isotp_register_rxid(so))
+ can_rx_unregister(net, dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
+ can_rx_unregister(net, dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
dev_put(dev);
synchronize_rcu();
}
@@ -1090,42 +1217,51 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct net *net = sock_net(sk);
int ifindex;
struct net_device *dev;
+ canid_t tx_id = addr->can_addr.tp.tx_id;
+ canid_t rx_id = addr->can_addr.tp.rx_id;
int err = 0;
int notify_enetdown = 0;
- int do_rx_reg = 1;
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
- if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
- return -EADDRNOTAVAIL;
+ /* sanitize tx CAN identifier */
+ if (tx_id & CAN_EFF_FLAG)
+ tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ tx_id &= CAN_SFF_MASK;
+
+ /* give feedback on wrong CAN-ID value */
+ if (tx_id != addr->can_addr.tp.tx_id)
+ return -EINVAL;
+
+ /* sanitize rx CAN identifier (if needed) */
+ if (isotp_register_rxid(so)) {
+ if (rx_id & CAN_EFF_FLAG)
+ rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ rx_id &= CAN_SFF_MASK;
+
+ /* give feedback on wrong CAN-ID value */
+ if (rx_id != addr->can_addr.tp.rx_id)
+ return -EINVAL;
+ }
if (!addr->can_ifindex)
return -ENODEV;
lock_sock(sk);
- /* do not register frame reception for functional addressing */
- if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
- do_rx_reg = 0;
-
- /* do not validate rx address for functional addressing */
- if (do_rx_reg) {
- if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
+ if (so->bound) {
+ err = -EINVAL;
+ goto out;
}
- if (so->bound && addr->can_ifindex == so->ifindex &&
- addr->can_addr.tp.rx_id == so->rxid &&
- addr->can_addr.tp.tx_id == so->txid)
+ /* ensure different CAN IDs when the rx_id is to be registered */
+ if (isotp_register_rxid(so) && rx_id == tx_id) {
+ err = -EADDRNOTAVAIL;
goto out;
+ }
dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
@@ -1147,30 +1283,25 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
- if (do_rx_reg)
- can_rx_register(net, dev, addr->can_addr.tp.rx_id,
- SINGLE_MASK(addr->can_addr.tp.rx_id),
+ if (isotp_register_rxid(so))
+ can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
- dev_put(dev);
+ if (isotp_register_txecho(so)) {
+ /* no consecutive frame echo skb in flight */
+ so->cfecho = 0;
- if (so->bound && do_rx_reg) {
- /* unregister old filter */
- if (so->ifindex) {
- dev = dev_get_by_index(net, so->ifindex);
- if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
- dev_put(dev);
- }
- }
+ /* register for echo skb's */
+ can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+ isotp_rcv_echo, sk, "isotpe", sk);
}
+ dev_put(dev);
+
/* switch to new settings */
so->ifindex = ifindex;
- so->rxid = addr->can_addr.tp.rx_id;
- so->txid = addr->can_addr.tp.tx_id;
+ so->rxid = rx_id;
+ so->txid = tx_id;
so->bound = 1;
out:
@@ -1224,6 +1355,23 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
/* no separate rx_ext_address is given => use ext_address */
if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
so->opt.rx_ext_address = so->opt.ext_address;
+
+ /* these broadcast flags are not allowed together */
+ if (isotp_bc_flags(so) == ISOTP_ALL_BC_FLAGS) {
+ /* CAN_ISOTP_SF_BROADCAST is prioritized */
+ so->opt.flags &= ~CAN_ISOTP_CF_BROADCAST;
+
+ /* give user feedback on wrong config attempt */
+ ret = -EINVAL;
+ }
+
+ /* check for frame_txtime changes (0 => no changes) */
+ if (so->opt.frame_txtime) {
+ if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
+ so->frame_txtime = 0;
+ else
+ so->frame_txtime = so->opt.frame_txtime;
+ }
break;
case CAN_ISOTP_RECV_FC:
@@ -1367,10 +1515,16 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST)))
- can_rx_unregister(dev_net(dev), dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
+ if (so->bound && isotp_register_txecho(so)) {
+ if (isotp_register_rxid(so))
+ can_rx_unregister(dev_net(dev), dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
+ can_rx_unregister(dev_net(dev), dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
+ }
so->ifindex = 0;
so->bound = 0;
@@ -1425,6 +1579,7 @@ static int isotp_init(struct sock *sk)
so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
@@ -1444,6 +1599,7 @@ static int isotp_init(struct sock *sk)
so->txtimer.function = isotp_tx_timer_handler;
init_waitqueue_head(&so->wait);
+ spin_lock_init(&so->rx_lock);
spin_lock(&isotp_notifier_lock);
list_add_tail(&so->notifier, &isotp_notifier_list);
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 8452b0fbb78c..821d4ff303b3 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -42,6 +42,10 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
struct j1939_sk_buff_cb *skcb, *iskcb;
struct can_frame *cf;
+ /* make sure we only get Classical CAN frames */
+ if (!can_is_can_skb(iskb))
+ return;
+
/* create a copy of the skb
* j1939 only delivers the real data bytes,
* the header goes into sockaddr.
@@ -332,6 +336,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
/* re-claim the CAN_HDR from the SKB */
cf = skb_push(skb, J1939_CAN_HDR);
+ /* initialize header structure */
+ memset(cf, 0, J1939_CAN_HDR);
+
/* make it a full can frame again */
skb_put(skb, J1939_CAN_FTR + (8 - dlc));
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6dff4510687a..b670ba03a675 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -178,7 +178,10 @@ activate_next:
if (!first)
return;
- if (WARN_ON_ONCE(j1939_session_activate(first))) {
+ if (j1939_session_activate(first)) {
+ netdev_warn_once(first->priv->ndev,
+ "%s: 0x%p: Identical session is already activated.\n",
+ __func__, first);
first->err = -EBUSY;
goto activate_next;
} else {
@@ -802,7 +805,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
SCM_J1939_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, 0, &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
return ret;
@@ -841,7 +844,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
paddr->can_addr.j1939.pgn = skcb->addr.pgn;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
msg->msg_flags |= skcb->msg_flags;
skb_free_datagram(sk, skb);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index a271688780a2..55f29c9f9e08 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -260,6 +260,8 @@ static void __j1939_session_drop(struct j1939_session *session)
static void j1939_session_destroy(struct j1939_session *session)
{
+ struct sk_buff *skb;
+
if (session->transmission) {
if (session->err)
j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
@@ -274,7 +276,11 @@ static void j1939_session_destroy(struct j1939_session *session)
WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
- skb_queue_purge(&session->skb_queue);
+ while ((skb = skb_dequeue(&session->skb_queue)) != NULL) {
+ /* drop ref taken in j1939_session_skb_queue() */
+ skb_unref(skb);
+ kfree_skb(skb);
+ }
__j1939_session_drop(session);
j1939_priv_put(session->priv);
kfree(session);
@@ -336,10 +342,12 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
__skb_unlink(do_skb, &session->skb_queue);
/* drop ref taken in j1939_session_skb_queue() */
skb_unref(do_skb);
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
kfree_skb(do_skb);
+ } else {
+ spin_unlock_irqrestore(&session->skb_queue.lock, flags);
}
- spin_unlock_irqrestore(&session->skb_queue.lock, flags);
}
void j1939_session_skb_queue(struct j1939_session *session,
@@ -2006,7 +2014,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
/* set the end-packet for broadcast */
session->pkt.last = session->pkt.total;
- skcb->tskey = session->sk->sk_tskey++;
+ skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1;
session->tskey = skcb->tskey;
return session;
diff --git a/net/can/proc.c b/net/can/proc.c
index b3099f0a3cb8..bbce97825f13 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -305,7 +305,7 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)PDE_DATA(m->file->f_inode);
+ int idx = (int)(long)pde_data(m->file->f_inode);
struct net_device *dev;
struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
diff --git a/net/can/raw.c b/net/can/raw.c
index 7105fa4824e4..3eb7d3e2b541 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -50,6 +50,7 @@
#include <linux/skbuff.h>
#include <linux/can.h>
#include <linux/can/core.h>
+#include <linux/can/dev.h> /* for can_is_canxl_dev_mtu() */
#include <linux/can/skb.h>
#include <linux/can/raw.h>
#include <net/sock.h>
@@ -87,6 +88,7 @@ struct raw_sock {
int loopback;
int recv_own_msgs;
int fd_frames;
+ int xl_frames;
int join_filters;
int count; /* number of active filters */
struct can_filter dfilter; /* default/single filter */
@@ -129,21 +131,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
if (!ro->recv_own_msgs && oskb->sk == sk)
return;
- /* do not pass non-CAN2.0 frames to a legacy socket */
- if (!ro->fd_frames && oskb->len != CAN_MTU)
+ /* make sure to not pass oversized frames to the socket */
+ if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) ||
+ (can_is_canxl_skb(oskb) && !ro->xl_frames))
return;
/* eliminate multiple filter matches for the same skb */
if (this_cpu_ptr(ro->uniq)->skb == oskb &&
this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) {
- if (ro->join_filters) {
- this_cpu_inc(ro->uniq->join_rx_count);
- /* drop frame until all enabled filters matched */
- if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count)
- return;
- } else {
+ if (!ro->join_filters)
+ return;
+
+ this_cpu_inc(ro->uniq->join_rx_count);
+ /* drop frame until all enabled filters matched */
+ if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count)
return;
- }
} else {
this_cpu_ptr(ro->uniq)->skb = oskb;
this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt;
@@ -346,6 +348,7 @@ static int raw_init(struct sock *sk)
ro->loopback = 1;
ro->recv_own_msgs = 0;
ro->fd_frames = 0;
+ ro->xl_frames = 0;
ro->join_filters = 0;
/* alloc_percpu provides zero'ed memory */
@@ -669,6 +672,15 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
break;
+ case CAN_RAW_XL_FRAMES:
+ if (optlen != sizeof(ro->xl_frames))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&ro->xl_frames, optval, optlen))
+ return -EFAULT;
+
+ break;
+
case CAN_RAW_JOIN_FILTERS:
if (optlen != sizeof(ro->join_filters))
return -EINVAL;
@@ -751,6 +763,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
val = &ro->fd_frames;
break;
+ case CAN_RAW_XL_FRAMES:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = &ro->xl_frames;
+ break;
+
case CAN_RAW_JOIN_FILTERS:
if (len > sizeof(int))
len = sizeof(int);
@@ -772,10 +790,15 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
+ struct sockcm_cookie sockc;
struct sk_buff *skb;
struct net_device *dev;
int ifindex;
- int err;
+ int err = -EINVAL;
+
+ /* check for valid CAN frame sizes */
+ if (size < CANXL_HDR_SIZE + CANXL_MIN_DLEN || size > CANXL_MTU)
+ return -EINVAL;
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
@@ -795,15 +818,6 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (!dev)
return -ENXIO;
- err = -EINVAL;
- if (ro->fd_frames && dev->mtu == CANFD_MTU) {
- if (unlikely(size != CANFD_MTU && size != CAN_MTU))
- goto put_dev;
- } else {
- if (unlikely(size != CAN_MTU))
- goto put_dev;
- }
-
skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
@@ -813,15 +827,39 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
can_skb_prv(skb)->ifindex = dev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
+ /* fill the skb before testing for valid CAN frames */
err = memcpy_from_msg(skb_put(skb, size), msg, size);
if (err < 0)
goto free_skb;
- skb_setup_tx_timestamp(skb, sk->sk_tsflags);
+ err = -EINVAL;
+ if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) {
+ /* CAN XL, CAN FD and Classical CAN */
+ if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) &&
+ !can_is_can_skb(skb))
+ goto free_skb;
+ } else if (ro->fd_frames && dev->mtu == CANFD_MTU) {
+ /* CAN FD and Classical CAN */
+ if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb))
+ goto free_skb;
+ } else {
+ /* Classical CAN */
+ if (!can_is_can_skb(skb))
+ goto free_skb;
+ }
+
+ sockcm_init(&sockc, sk);
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(sk, msg, &sockc);
+ if (unlikely(err))
+ goto free_skb;
+ }
skb->dev = dev;
- skb->sk = sk;
skb->priority = sk->sk_priority;
+ skb->tstamp = sockc.transmit_time;
+
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
err = can_send(skb, ro->loopback);
@@ -846,16 +884,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int err = 0;
- int noblock;
-
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
if (flags & MSG_ERRQUEUE)
return sock_recv_errqueue(sk, msg, size,
SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
@@ -870,7 +904,7 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return err;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(RAW_MIN_NAMELEN);
@@ -938,12 +972,20 @@ static __init int raw_module_init(void)
pr_info("can: raw protocol\n");
+ err = register_netdevice_notifier(&canraw_notifier);
+ if (err)
+ return err;
+
err = can_proto_register(&raw_can_proto);
- if (err < 0)
+ if (err < 0) {
pr_err("can: registration of raw protocol failed\n");
- else
- register_netdevice_notifier(&canraw_notifier);
+ goto register_proto_failed;
+ }
+
+ return 0;
+register_proto_failed:
+ unregister_netdevice_notifier(&canraw_notifier);
return err;
}
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 5622763ad402..7e51f128045d 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -7,7 +7,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
+#include <linux/ceph/libceph.h> /* for kvmalloc */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
@@ -17,7 +17,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (!b)
return NULL;
- b->vec.iov_base = ceph_kvmalloc(len, gfp);
+ b->vec.iov_base = kvmalloc(len, gfp);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 97d6ea763e32..4c6441536d55 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -190,41 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
-/*
- * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
- * compatible with (a superset of) GFP_KERNEL. This is because while the
- * actual pages are allocated with the specified flags, the page table pages
- * are always allocated with GFP_KERNEL.
- *
- * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
- */
-void *ceph_kvmalloc(size_t size, gfp_t flags)
-{
- void *p;
-
- if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
- p = kvmalloc(size, flags);
- } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
- unsigned int nofs_flag = memalloc_nofs_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_nofs_restore(nofs_flag);
- } else {
- unsigned int noio_flag = memalloc_noio_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_noio_restore(noio_flag);
- }
-
- return p;
-}
-
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
- dout("parse_fsid '%s'\n", str);
+ dout("%s '%s'\n", __func__, str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
@@ -244,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
+ dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
return err;
}
+EXPORT_SYMBOL(ceph_parse_fsid);
/*
* ceph options
@@ -272,6 +246,7 @@ enum {
Opt_cephx_sign_messages,
Opt_tcp_nodelay,
Opt_abort_on_full,
+ Opt_rxbounce,
};
enum {
@@ -321,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
+ fsparam_flag ("rxbounce", Opt_rxbounce),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
@@ -422,14 +398,14 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l)
+ struct fc_log *l, char delim)
{
struct p_log log = {.prefix = "libceph", .log = l};
int ret;
- /* ip1[:port1][,ip2[:port2]...] */
+ /* ip1[:port1][<delim>ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
- &opt->num_mon);
+ &opt->num_mon, delim);
if (ret) {
error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
@@ -455,8 +431,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_ip:
err = ceph_parse_ips(param->string,
param->string + param->size,
- &opt->my_addr,
- 1, NULL);
+ &opt->my_addr, 1, NULL, ',');
if (err) {
error_plog(&log, "Failed to parse ip: %d", err);
return err;
@@ -465,7 +440,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_fsid:
- err = parse_fsid(param->string, &opt->fsid);
+ err = ceph_parse_fsid(param->string, &opt->fsid);
if (err) {
error_plog(&log, "Failed to parse fsid: %d", err);
return err;
@@ -611,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_abort_on_full:
opt->flags |= CEPH_OPT_ABORT_ON_FULL;
break;
+ case Opt_rxbounce:
+ opt->flags |= CEPH_OPT_RXBOUNCE;
+ break;
default:
BUG();
@@ -687,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "notcp_nodelay,");
if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
seq_puts(m, "abort_on_full,");
+ if (opt->flags & CEPH_OPT_RXBOUNCE)
+ seq_puts(m, "rxbounce,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,",
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 7057f8db4f99..1daf95e17d67 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -906,7 +906,6 @@ int crush_do_rule(const struct crush_map *map,
int recurse_to_leaf;
int wsize = 0;
int osize;
- int *tmp;
const struct crush_rule *rule;
__u32 step;
int i, j;
@@ -1073,9 +1072,7 @@ int crush_do_rule(const struct crush_map *map,
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */
- tmp = o;
- o = w;
- w = tmp;
+ swap(o, w);
wsize = osize;
break;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 92d89b331645..051d22c0e4ad 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -147,7 +147,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
/*
- * Should be used for buffers allocated with ceph_kvmalloc().
+ * Should be used for buffers allocated with kvmalloc().
* Currently these are encrypt out-buffer (ceph_buffer) and decrypt
* in-buffer (msg front).
*
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57d043b382ed..dfa237fbd5a3 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
+ if (con->bounce_page) {
+ __free_page(con->bounce_page);
+ con->bounce_page = NULL;
+ }
if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_reset_protocol(con);
@@ -724,7 +728,6 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
it->iter.bi_size = cursor->resid;
BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
- cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
}
static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
@@ -750,10 +753,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
cursor->resid -= bytes;
bio_advance_iter(it->bio, &it->iter, bytes);
- if (!cursor->resid) {
- BUG_ON(!cursor->last_piece);
+ if (!cursor->resid)
return false; /* no more data */
- }
if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
page == bio_iter_page(it->bio, it->iter)))
@@ -766,9 +767,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
it->iter.bi_size = cursor->resid;
}
- BUG_ON(cursor->last_piece);
BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
- cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
return true;
}
#endif /* CONFIG_BLOCK */
@@ -784,8 +783,6 @@ static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->bvec_iter.bi_size = cursor->resid;
BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
- cursor->last_piece =
- cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
}
static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
@@ -811,19 +808,14 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
cursor->resid -= bytes;
bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
- if (!cursor->resid) {
- BUG_ON(!cursor->last_piece);
+ if (!cursor->resid)
return false; /* no more data */
- }
if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
page == bvec_iter_page(bvecs, cursor->bvec_iter)))
return false; /* more bytes to process in this segment */
- BUG_ON(cursor->last_piece);
BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
- cursor->last_piece =
- cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
return true;
}
@@ -849,7 +841,6 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
BUG_ON(page_count > (int)USHRT_MAX);
cursor->page_count = (unsigned short)page_count;
BUG_ON(length > SIZE_MAX - cursor->page_offset);
- cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
}
static struct page *
@@ -864,11 +855,7 @@ ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
BUG_ON(cursor->page_offset >= PAGE_SIZE);
*page_offset = cursor->page_offset;
- if (cursor->last_piece)
- *length = cursor->resid;
- else
- *length = PAGE_SIZE - *page_offset;
-
+ *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset);
return data->pages[cursor->page_index];
}
@@ -893,8 +880,6 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
BUG_ON(cursor->page_index >= cursor->page_count);
cursor->page_index++;
- cursor->last_piece = cursor->resid <= PAGE_SIZE;
-
return true;
}
@@ -924,7 +909,6 @@ ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->resid = min(length, pagelist->length);
cursor->page = page;
cursor->offset = 0;
- cursor->last_piece = cursor->resid <= PAGE_SIZE;
}
static struct page *
@@ -944,11 +928,7 @@ ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
/* offset of first page in pagelist is always 0 */
*page_offset = cursor->offset & ~PAGE_MASK;
- if (cursor->last_piece)
- *length = cursor->resid;
- else
- *length = PAGE_SIZE - *page_offset;
-
+ *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset);
return cursor->page;
}
@@ -981,8 +961,6 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
cursor->page = list_next_entry(cursor->page, lru);
- cursor->last_piece = cursor->resid <= PAGE_SIZE;
-
return true;
}
@@ -1040,8 +1018,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
* Indicate whether this is the last piece in this data item.
*/
struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
- size_t *page_offset, size_t *length,
- bool *last_piece)
+ size_t *page_offset, size_t *length)
{
struct page *page;
@@ -1070,8 +1047,6 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
BUG_ON(*page_offset + *length > PAGE_SIZE);
BUG_ON(!*length);
BUG_ON(*length > cursor->resid);
- if (last_piece)
- *last_piece = cursor->last_piece;
return page;
}
@@ -1108,7 +1083,6 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
cursor->total_resid -= bytes;
if (!cursor->resid && cursor->total_resid) {
- WARN_ON(!cursor->last_piece);
cursor->data++;
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
@@ -1267,30 +1241,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
*/
int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count)
+ int max_count, int *count, char delim)
{
int i, ret = -EINVAL;
const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
+ char cur_delim = delim;
const char *ipend;
int port;
- char delim = ',';
if (*p == '[') {
- delim = ']';
+ cur_delim = ']';
p++;
}
- ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
+ &ipend);
if (ret)
goto bad;
ret = -EINVAL;
p = ipend;
- if (delim == ']') {
+ if (cur_delim == ']') {
if (*p != ']') {
dout("missing matching ']'\n");
goto bad;
@@ -1326,11 +1301,11 @@ int ceph_parse_ips(const char *c, const char *end,
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
addr[i].nonce = 0;
- dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
+ dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
if (p == end)
break;
- if (*p != ',')
+ if (*p != delim)
goto bad;
p++;
}
@@ -1920,7 +1895,7 @@ struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
/* front */
if (front_len) {
- m->front.iov_base = ceph_kvmalloc(front_len, flags);
+ m->front.iov_base = kvmalloc(front_len, flags);
if (m->front.iov_base == NULL) {
dout("ceph_msg_new can't allocate %d bytes\n",
front_len);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 2cb5ffdf071a..3ddbde87e4d6 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -495,7 +495,7 @@ static int write_partial_message_data(struct ceph_connection *con)
continue;
}
- page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
+ page = ceph_msg_data_next(cursor, &page_offset, &length);
if (length == cursor->total_resid)
more = MSG_MORE;
ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
static int read_partial_msg_data(struct ceph_connection *con)
{
- struct ceph_msg *msg = con->in_msg;
- struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0;
int ret;
- if (!msg->num_data_items)
- return -EIO;
-
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->total_resid) {
@@ -1012,7 +1008,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
continue;
}
- page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
+ page = ceph_msg_data_next(cursor, &page_offset, &length);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
if (do_datacrc)
@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
return 1; /* must return > 0 to indicate success */
}
+static int read_partial_msg_data_bounce(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ struct page *page;
+ size_t off, len;
+ u32 crc;
+ int ret;
+
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ crc = con->in_data_crc;
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
+ page = ceph_msg_data_next(cursor, &off, &len);
+ ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
+ if (ret <= 0) {
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ crc = crc32c(crc, page_address(con->bounce_page), ret);
+ memcpy_to_page(page, off, page_address(con->bounce_page), ret);
+
+ ceph_msg_data_advance(cursor, ret);
+ }
+ con->in_data_crc = crc;
+
+ return 1; /* must return > 0 to indicate success */
+}
+
/*
* read (part of) a message.
*/
@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
/* (page) data */
if (data_len) {
- ret = read_partial_msg_data(con);
+ if (!m->num_data_items)
+ return -EIO;
+
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ ret = read_partial_msg_data_bounce(con);
+ else
+ ret = read_partial_msg_data(con);
if (ret <= 0)
return ret;
}
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index cc40ce4e02fb..cc8ff81a50b7 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -57,8 +57,9 @@
#define IN_S_HANDLE_CONTROL_REMAINDER 3
#define IN_S_PREPARE_READ_DATA 4
#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_HANDLE_EPILOGUE 6
-#define IN_S_FINISH_SKIP 7
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_HANDLE_EPILOGUE 7
+#define IN_S_FINISH_SKIP 8
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -308,7 +309,7 @@ static void *alloc_conn_buf(struct ceph_connection *con, int len)
if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
return NULL;
- buf = ceph_kvmalloc(len, GFP_NOIO);
+ buf = kvmalloc(len, GFP_NOIO);
if (!buf)
return NULL;
@@ -861,7 +862,7 @@ static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
ceph_msg_data_advance(cursor, 0);
/* get a piece of data, cursor isn't advanced */
- page = ceph_msg_data_next(cursor, &off, &len, NULL);
+ page = ceph_msg_data_next(cursor, &off, &len);
bv->bv_page = page;
bv->bv_offset = off;
@@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
-static int decrypt_message(struct ceph_connection *con)
+static int decrypt_tail(struct ceph_connection *con)
{
+ struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ int tail_len;
int ret;
+ tail_len = tail_onwire_len(con->in_msg, true);
+ ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt, 0, tail_len,
+ GFP_NOIO);
+ if (ret)
+ goto out;
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
con->v2.in_buf, true);
if (ret)
goto out;
- ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl,
- tail_onwire_len(con->in_msg, true));
+ dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
+ con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
+ ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
+ if (ret)
+ goto out;
+
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
out:
sg_free_table(&sgt);
+ sg_free_table(&enc_sgt);
return ret;
}
@@ -1733,54 +1753,153 @@ static int prepare_read_control_remainder(struct ceph_connection *con)
return 0;
}
-static void prepare_read_data(struct ceph_connection *con)
+static int prepare_read_data(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
- con->in_data_crc = -1;
+ con->in_data_crc = -1;
ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
data_len(con->in_msg));
get_bvec_at(&con->v2.in_cursor, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
set_in_bvec(con, &bv);
con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
+ return 0;
}
static void prepare_read_data_cont(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+
+ get_bvec_at(&con->v2.in_cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
con->v2.in_bvec.bv_page,
con->v2.in_bvec.bv_offset,
con->v2.in_bvec.bv_len);
+ }
ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
if (con->v2.in_cursor.total_resid) {
get_bvec_at(&con->v2.in_cursor, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
set_in_bvec(con, &bv);
WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
return;
}
/*
- * We've read all data. Prepare to read data padding (if any)
- * and epilogue.
+ * We've read all data. Prepare to read epilogue.
*/
reset_in_kvecs(con);
- if (con_secure(con)) {
- if (need_padding(data_len(con->in_msg)))
- add_in_kvec(con, DATA_PAD(con->v2.in_buf),
- padding_len(data_len(con->in_msg)));
- add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+}
+
+static int prepare_read_tail_plain(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ if (!front_len(msg) && !middle_len(msg)) {
+ WARN_ON(!data_len(msg));
+ return prepare_read_data(con);
+ }
+
+ reset_in_kvecs(con);
+ if (front_len(msg)) {
+ add_in_kvec(con, msg->front.iov_base, front_len(msg));
+ WARN_ON(msg->front.iov_len != front_len(msg));
+ }
+ if (middle_len(msg)) {
+ add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
+ WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
+ }
+
+ if (data_len(msg)) {
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+ return 0;
+}
+
+static void prepare_read_enc_page(struct ceph_connection *con)
+{
+ struct bio_vec bv;
+
+ dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
+ con->v2.in_enc_resid);
+ WARN_ON(!con->v2.in_enc_resid);
+
+ bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
+ bv.bv_offset = 0;
+ bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+
+ set_in_bvec(con, &bv);
+ con->v2.in_enc_i++;
+ con->v2.in_enc_resid -= bv.bv_len;
+
+ if (con->v2.in_enc_resid) {
+ con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
+ return;
+ }
+
+ /*
+ * We are set to read the last piece of ciphertext (ending
+ * with epilogue) + auth tag.
+ */
+ WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_read_tail_secure(struct ceph_connection *con)
+{
+ struct page **enc_pages;
+ int enc_page_cnt;
+ int tail_len;
+
+ tail_len = tail_onwire_len(con->in_msg, true);
+ WARN_ON(!tail_len);
+
+ enc_page_cnt = calc_pages_for(0, tail_len);
+ enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
+ if (IS_ERR(enc_pages))
+ return PTR_ERR(enc_pages);
+
+ WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = enc_pages;
+ con->v2.in_enc_page_cnt = enc_page_cnt;
+ con->v2.in_enc_resid = tail_len;
+ con->v2.in_enc_i = 0;
+
+ prepare_read_enc_page(con);
+ return 0;
+}
+
static void __finish_skip(struct ceph_connection *con)
{
con->in_seq++;
@@ -2589,47 +2708,26 @@ static int __handle_control(struct ceph_connection *con, void *p)
}
msg = con->in_msg; /* set in process_message_header() */
- if (!front_len(msg) && !middle_len(msg)) {
- if (!data_len(msg))
- return process_message(con);
-
- prepare_read_data(con);
- return 0;
- }
-
- reset_in_kvecs(con);
if (front_len(msg)) {
WARN_ON(front_len(msg) > msg->front_alloc_len);
- add_in_kvec(con, msg->front.iov_base, front_len(msg));
msg->front.iov_len = front_len(msg);
-
- if (con_secure(con) && need_padding(front_len(msg)))
- add_in_kvec(con, FRONT_PAD(con->v2.in_buf),
- padding_len(front_len(msg)));
} else {
msg->front.iov_len = 0;
}
if (middle_len(msg)) {
WARN_ON(middle_len(msg) > msg->middle->alloc_len);
- add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
msg->middle->vec.iov_len = middle_len(msg);
-
- if (con_secure(con) && need_padding(middle_len(msg)))
- add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf),
- padding_len(middle_len(msg)));
} else if (msg->middle) {
msg->middle->vec.iov_len = 0;
}
- if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
- } else {
- add_in_kvec(con, con->v2.in_buf,
- con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN :
- CEPH_EPILOGUE_PLAIN_LEN);
- con->v2.in_state = IN_S_HANDLE_EPILOGUE;
- }
- return 0;
+ if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
+ return process_message(con);
+
+ if (con_secure(con))
+ return prepare_read_tail_secure(con);
+
+ return prepare_read_tail_plain(con);
}
static int handle_preamble(struct ceph_connection *con)
@@ -2717,7 +2815,7 @@ static int handle_epilogue(struct ceph_connection *con)
int ret;
if (con_secure(con)) {
- ret = decrypt_message(con);
+ ret = decrypt_tail(con);
if (ret) {
if (ret == -EBADMSG)
con->error_msg = "integrity error, bad epilogue auth tag";
@@ -2785,13 +2883,16 @@ static int populate_in_iter(struct ceph_connection *con)
ret = handle_control_remainder(con);
break;
case IN_S_PREPARE_READ_DATA:
- prepare_read_data(con);
- ret = 0;
+ ret = prepare_read_data(con);
break;
case IN_S_PREPARE_READ_DATA_CONT:
prepare_read_data_cont(con);
ret = 0;
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ prepare_read_enc_page(con);
+ ret = 0;
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3326,20 +3427,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
static void revoke_at_prepare_read_data(struct ceph_connection *con)
{
- int remaining; /* data + [data padding] + epilogue */
+ int remaining;
int resid;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
- if (con_secure(con))
- remaining = padded_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
remaining);
con->v2.in_iter.count -= resid;
@@ -3350,8 +3447,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con)
static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
{
int recved, resid; /* current piece of data */
- int remaining; /* [data padding] + epilogue */
+ int remaining;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
@@ -3363,12 +3461,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
ceph_msg_data_advance(&con->v2.in_cursor, recved);
WARN_ON(resid > con->v2.in_cursor.total_resid);
- if (con_secure(con))
- remaining = padding_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
con->v2.in_cursor.total_resid, remaining);
con->v2.in_iter.count -= resid;
@@ -3376,11 +3469,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
+{
+ int resid; /* current enc page (not necessarily data) */
+
+ WARN_ON(!con_secure(con));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
+
+ dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
+ con->v2.in_enc_resid);
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + con->v2.in_enc_resid);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
- WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
@@ -3399,6 +3507,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_DATA_CONT:
revoke_at_prepare_read_data_cont(con);
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ revoke_at_prepare_read_enc_page(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
@@ -3432,6 +3543,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
clear_out_sign_kvecs(con);
free_conn_bufs(con);
+ if (con->v2.in_enc_pages) {
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
+ }
if (con->v2.out_enc_pages) {
WARN_ON(!con->v2.out_enc_page_cnt);
ceph_release_page_vector(con->v2.out_enc_pages,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 6a6898ee4049..db60217f911b 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -222,7 +222,7 @@ static void pick_new_mon(struct ceph_mon_client *monc)
max--;
}
- n = prandom_u32() % max;
+ n = prandom_u32_max(max);
if (o >= 0 && n >= o)
n++;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1c5815530e0d..4e4f1e4bc265 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
target_init(&req->r_t);
}
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable. Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
- struct ceph_osd_client *osdc = req->r_osdc;
- bool mempool = req->r_mempool;
- unsigned int num_ops = req->r_num_ops;
- u64 snapid = req->r_snapid;
- struct ceph_snap_context *snapc = req->r_snapc;
- bool linger = req->r_linger;
- struct ceph_msg *request_msg = req->r_request;
- struct ceph_msg *reply_msg = req->r_reply;
-
- dout("%s req %p\n", __func__, req);
- WARN_ON(kref_read(&req->r_kref) != 1);
- request_release_checks(req);
-
- WARN_ON(kref_read(&request_msg->kref) != 1);
- WARN_ON(kref_read(&reply_msg->kref) != 1);
- target_destroy(&req->r_t);
-
- request_init(req);
- req->r_osdc = osdc;
- req->r_mempool = mempool;
- req->r_num_ops = num_ops;
- req->r_snapid = snapid;
- req->r_snapc = snapc;
- req->r_linger = linger;
- req->r_request = request_msg;
- req->r_reply = reply_msg;
-}
-
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
* @watch_opcode: CEPH_OSD_WATCH_OP_*
*/
static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
- u64 cookie, u8 watch_opcode)
+ u8 watch_opcode, u64 cookie, u32 gen)
{
struct ceph_osd_req_op *op;
op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
- op->watch.gen = 0;
+ op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+ u64 cookie, struct ceph_pagelist *request_pl)
+{
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op->notify.cookie = cookie;
+
+ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+ op->indata_len = request_pl->length;
}
/*
@@ -1500,7 +1479,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
static int pick_random_replica(const struct ceph_osds *acting)
{
- int i = prandom_u32() % acting->size;
+ int i = prandom_u32_max(acting->size);
dout("%s picked osd%d, primary osd%d\n", __func__,
acting->osds[i], acting->primary);
@@ -2385,7 +2364,11 @@ again:
if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
err = -ENOSPC;
} else {
- pr_warn_ratelimited("FULL or reached pool quota\n");
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL))
+ pr_warn_ratelimited("cluster is full (osdmap FULL)\n");
+ else
+ pr_warn_ratelimited("pool %lld is full or reached quota\n",
+ req->r_t.base_oloc.pool);
req->r_t.paused = true;
maybe_request_map(osdc);
}
@@ -2727,10 +2710,13 @@ static void linger_release(struct kref *kref)
WARN_ON(!list_empty(&lreq->pending_lworks));
WARN_ON(lreq->osd);
- if (lreq->reg_req)
- ceph_osdc_put_request(lreq->reg_req);
- if (lreq->ping_req)
- ceph_osdc_put_request(lreq->ping_req);
+ if (lreq->request_pl)
+ ceph_pagelist_release(lreq->request_pl);
+ if (lreq->notify_id_pages)
+ ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+ ceph_osdc_put_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->ping_req);
target_destroy(&lreq->t);
kfree(lreq);
}
@@ -2999,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3044,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
lreq, lreq->linger_id, req->r_result, lreq->last_error);
if (req->r_result < 0) {
@@ -3053,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
static void send_linger(struct ceph_osd_linger_request *lreq)
{
- struct ceph_osd_request *req = lreq->reg_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_client *osdc = lreq->osdc;
+ struct ceph_osd_request *req;
+ int ret;
- verify_osdc_wrlocked(req->r_osdc);
+ verify_osdc_wrlocked(osdc);
+ mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->reg_req) {
+ if (lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->reg_req);
+ }
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- request_reinit(req);
target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
- mutex_lock(&lreq->lock);
if (lreq->is_watch && lreq->committed) {
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id);
- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
- op->watch.gen = ++lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+ lreq->linger_id, ++lreq->register_gen);
dout("lreq %p reconnect register_gen %u\n", lreq,
- op->watch.gen);
+ req->r_ops[0].watch.gen);
req->r_callback = linger_reconnect_cb;
} else {
- if (!lreq->is_watch)
+ if (lreq->is_watch) {
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+ lreq->linger_id, 0);
+ } else {
lreq->notify_id = 0;
- else
- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+ refcount_inc(&lreq->request_pl->refcnt);
+ osd_req_op_notify_init(req, 0, lreq->linger_id,
+ lreq->request_pl);
+ ceph_osd_data_pages_init(
+ osd_req_op_data(req, 0, notify, response_data),
+ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+ }
dout("lreq %p register\n", lreq);
req->r_callback = linger_commit_cb;
}
- mutex_unlock(&lreq->lock);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->reg_req = req;
+ mutex_unlock(&lreq->lock);
submit_request(req, true);
}
@@ -3102,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->ping_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
__func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
lreq->last_error);
@@ -3117,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
lreq->register_gen, req->r_ops[0].watch.gen);
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3124,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
static void send_linger_ping(struct ceph_osd_linger_request *lreq)
{
struct ceph_osd_client *osdc = lreq->osdc;
- struct ceph_osd_request *req = lreq->ping_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_request *req;
+ int ret;
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
__func__, lreq, lreq->linger_id, lreq->ping_sent,
lreq->register_gen);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->ping_req) {
+ if (lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+ ceph_osdc_put_request(lreq->ping_req);
+ }
- request_reinit(req);
- target_copy(&req->r_t, &lreq->t);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id ||
- op->watch.op != CEPH_OSD_WATCH_OP_PING);
- op->watch.gen = lreq->register_gen;
+ target_copy(&req->r_t, &lreq->t);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+ lreq->register_gen);
req->r_callback = linger_ping_cb;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
+
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->ping_req = req;
ceph_osdc_get_request(req);
account_request(req);
@@ -3165,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
down_write(&osdc->lock);
linger_register(lreq);
- if (lreq->is_watch) {
- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
- } else {
- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
- }
calc_target(osdc, &lreq->t, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
*/
static void __linger_cancel(struct ceph_osd_linger_request *lreq)
{
- if (lreq->is_watch && lreq->ping_req->r_osd)
+ if (lreq->ping_req && lreq->ping_req->r_osd)
cancel_linger_request(lreq->ping_req);
- if (lreq->reg_req->r_osd)
+ if (lreq->reg_req && lreq->reg_req->r_osd)
cancel_linger_request(lreq->reg_req);
cancel_linger_map_check(lreq);
unlink_linger(lreq->osd, lreq);
@@ -4553,21 +4578,23 @@ bad:
/*
* Register request, send initial attempt.
*/
-int ceph_osdc_start_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req,
- bool nofail)
+void ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
{
down_read(&osdc->lock);
submit_request(req, false);
up_read(&osdc->lock);
-
- return 0;
}
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
- * Unregister a registered request. The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request. If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
@@ -4653,43 +4680,6 @@ again:
}
EXPORT_SYMBOL(ceph_osdc_sync);
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
- struct ceph_osd_request *req;
-
- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
- if (!req)
- return NULL;
-
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
- struct ceph_osd_request *req;
-
- req = alloc_linger_request(lreq);
- if (!req)
- return NULL;
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
- ceph_osdc_put_request(req);
- return NULL;
- }
-
- return req;
-}
-
/*
* Returns a handle, caller owns a ref.
*/
@@ -4719,18 +4709,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
- if (!lreq->ping_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (ret) {
@@ -4768,14 +4746,14 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&req->r_mtime);
- osd_req_op_watch_init(req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_UNWATCH);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+ lreq->linger_id, 0);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
linger_cancel(lreq);
linger_put(lreq);
ret = wait_request_timeout(req, opts->mount_timeout);
@@ -4846,7 +4824,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
out_put_req:
@@ -4855,35 +4833,6 @@ out_put_req:
}
EXPORT_SYMBOL(ceph_osdc_notify_ack);
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
- u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, u32 payload_len)
-{
- struct ceph_osd_req_op *op;
- struct ceph_pagelist *pl;
- int ret;
-
- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
- op->notify.cookie = cookie;
-
- pl = ceph_pagelist_alloc(GFP_NOIO);
- if (!pl)
- return -ENOMEM;
-
- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
- ret |= ceph_pagelist_encode_32(pl, timeout);
- ret |= ceph_pagelist_encode_32(pl, payload_len);
- ret |= ceph_pagelist_append(pl, payload, payload_len);
- if (ret) {
- ceph_pagelist_release(pl);
- return -ENOMEM;
- }
-
- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
- op->indata_len = pl->length;
- return 0;
-}
-
/*
* @timeout: in seconds
*
@@ -4902,7 +4851,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
size_t *preply_len)
{
struct ceph_osd_linger_request *lreq;
- struct page **pages;
int ret;
WARN_ON(!timeout);
@@ -4915,41 +4863,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
if (!lreq)
return -ENOMEM;
- lreq->preply_pages = preply_pages;
- lreq->preply_len = preply_len;
-
- ceph_oid_copy(&lreq->t.base_oid, oid);
- ceph_oloc_copy(&lreq->t.base_oloc, oloc);
- lreq->t.flags = CEPH_OSD_FLAG_READ;
-
- lreq->reg_req = alloc_linger_request(lreq);
- if (!lreq->reg_req) {
+ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+ if (!lreq->request_pl) {
ret = -ENOMEM;
goto out_put_lreq;
}
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
- payload, payload_len);
- if (ret)
+ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+ if (ret) {
+ ret = -ENOMEM;
goto out_put_lreq;
+ }
/* for notify_id */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
+ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(lreq->notify_id_pages)) {
+ ret = PTR_ERR(lreq->notify_id_pages);
+ lreq->notify_id_pages = NULL;
goto out_put_lreq;
}
- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
- response_data),
- pages, PAGE_SIZE, 0, false, true);
- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
- if (ret)
- goto out_put_lreq;
+ lreq->preply_pages = preply_pages;
+ lreq->preply_len = preply_len;
+
+ ceph_oid_copy(&lreq->t.base_oid, oid);
+ ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+ lreq->t.flags = CEPH_OSD_FLAG_READ;
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
@@ -5098,7 +5040,7 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
void *p = page_address(pages[0]);
@@ -5175,7 +5117,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
ret = req->r_ops[0].rval;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 75b738083523..295098873861 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -11,6 +11,22 @@
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>
+static __printf(2, 3)
+void osdmap_info(const struct ceph_osdmap *map, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_INFO "%s (%pU e%u): %pV", KBUILD_MODNAME, &map->fsid,
+ map->epoch, &vaf);
+
+ va_end(args);
+}
+
char *ceph_osdmap_state_str(char *str, int len, u32 state)
{
if (!len)
@@ -571,10 +587,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
goto bad;
#endif
r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
- c->rules[i] = r;
if (r == NULL)
goto badmem;
dout(" rule %d is at %p\n", i, r);
+ c->rules[i] = r;
r->len = yes;
ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
@@ -980,7 +996,7 @@ static struct crush_work *alloc_workspace(const struct crush_map *c)
work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
dout("%s work_size %zu bytes\n", __func__, work_size);
- work = ceph_kvmalloc(work_size, GFP_NOIO);
+ work = kvmalloc(work_size, GFP_NOIO);
if (!work)
return NULL;
@@ -1190,9 +1206,9 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (max == map->max_osd)
return 0;
- state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
- weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
- addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+ state = kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+ weight = kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+ addr = kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
if (!state || !weight || !addr) {
kvfree(state);
kvfree(weight);
@@ -1222,7 +1238,7 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (map->osd_primary_affinity) {
u32 *affinity;
- affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+ affinity = kvmalloc(array_size(max, sizeof(*affinity)),
GFP_NOFS);
if (!affinity)
return -ENOMEM;
@@ -1503,7 +1519,7 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
if (!map->osd_primary_affinity) {
int i;
- map->osd_primary_affinity = ceph_kvmalloc(
+ map->osd_primary_affinity = kvmalloc(
array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
GFP_NOFS);
if (!map->osd_primary_affinity)
@@ -1566,7 +1582,7 @@ static int decode_new_primary_affinity(void **p, void *end,
if (ret)
return ret;
- pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
+ osdmap_info(map, "osd%d primary-affinity 0x%x\n", osd, aff);
}
return 0;
@@ -1864,9 +1880,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p);
w = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd);
- pr_info("osd%d weight 0x%x %s\n", osd, w,
- w == CEPH_OSD_IN ? "(in)" :
- (w == CEPH_OSD_OUT ? "(out)" : ""));
+ osdmap_info(map, "osd%d weight 0x%x %s\n", osd, w,
+ w == CEPH_OSD_IN ? "(in)" :
+ (w == CEPH_OSD_OUT ? "(out)" : ""));
map->osd_weight[osd] = w;
/*
@@ -1898,10 +1914,10 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
BUG_ON(osd >= map->max_osd);
if ((map->osd_state[osd] & CEPH_OSD_UP) &&
(xorstate & CEPH_OSD_UP))
- pr_info("osd%d down\n", osd);
+ osdmap_info(map, "osd%d down\n", osd);
if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
(xorstate & CEPH_OSD_EXISTS)) {
- pr_info("osd%d does not exist\n", osd);
+ osdmap_info(map, "osd%d does not exist\n", osd);
ret = set_primary_affinity(map, osd,
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
if (ret)
@@ -1931,7 +1947,7 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
- pr_info("osd%d up\n", osd);
+ osdmap_info(map, "osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
}
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 65e34f78b05d..74622b278d57 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -96,7 +96,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
EXPORT_SYMBOL(ceph_pagelist_append);
/* Allocate enough pages for a pagelist to append the given amount
- * of data without without allocating.
+ * of data without allocating.
* Returns: 0 on success, -ENOMEM on error.
*/
int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
diff --git a/net/compat.c b/net/compat.c
index 210fc3b4d0d8..385f04a6be2f 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -34,20 +34,15 @@
#include <net/compat.h>
int __get_compat_msghdr(struct msghdr *kmsg,
- struct compat_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- compat_uptr_t *ptr, compat_size_t *len)
+ struct compat_msghdr *msg,
+ struct sockaddr __user **save_addr)
{
- struct compat_msghdr msg;
ssize_t err;
- if (copy_from_user(&msg, umsg, sizeof(*umsg)))
- return -EFAULT;
-
- kmsg->msg_flags = msg.msg_flags;
- kmsg->msg_namelen = msg.msg_namelen;
+ kmsg->msg_flags = msg->msg_flags;
+ kmsg->msg_namelen = msg->msg_namelen;
- if (!msg.msg_name)
+ if (!msg->msg_name)
kmsg->msg_namelen = 0;
if (kmsg->msg_namelen < 0)
@@ -57,15 +52,16 @@ int __get_compat_msghdr(struct msghdr *kmsg,
kmsg->msg_namelen = sizeof(struct sockaddr_storage);
kmsg->msg_control_is_user = true;
- kmsg->msg_control_user = compat_ptr(msg.msg_control);
- kmsg->msg_controllen = msg.msg_controllen;
+ kmsg->msg_get_inq = 0;
+ kmsg->msg_control_user = compat_ptr(msg->msg_control);
+ kmsg->msg_controllen = msg->msg_controllen;
if (save_addr)
- *save_addr = compat_ptr(msg.msg_name);
+ *save_addr = compat_ptr(msg->msg_name);
- if (msg.msg_name && kmsg->msg_namelen) {
+ if (msg->msg_name && kmsg->msg_namelen) {
if (!save_addr) {
- err = move_addr_to_kernel(compat_ptr(msg.msg_name),
+ err = move_addr_to_kernel(compat_ptr(msg->msg_name),
kmsg->msg_namelen,
kmsg->msg_name);
if (err < 0)
@@ -76,12 +72,11 @@ int __get_compat_msghdr(struct msghdr *kmsg,
kmsg->msg_namelen = 0;
}
- if (msg.msg_iovlen > UIO_MAXIOV)
+ if (msg->msg_iovlen > UIO_MAXIOV)
return -EMSGSIZE;
kmsg->msg_iocb = NULL;
- *ptr = msg.msg_iov;
- *len = msg.msg_iovlen;
+ kmsg->msg_ubuf = NULL;
return 0;
}
@@ -90,15 +85,17 @@ int get_compat_msghdr(struct msghdr *kmsg,
struct sockaddr __user **save_addr,
struct iovec **iov)
{
- compat_uptr_t ptr;
- compat_size_t len;
+ struct compat_msghdr msg;
ssize_t err;
- err = __get_compat_msghdr(kmsg, umsg, save_addr, &ptr, &len);
+ if (copy_from_user(&msg, umsg, sizeof(*umsg)))
+ return -EFAULT;
+
+ err = __get_compat_msghdr(kmsg, &msg, save_addr);
if (err)
return err;
- err = import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr), len,
+ err = import_iovec(save_addr ? READ : WRITE, compat_ptr(msg.msg_iov), msg.msg_iovlen,
UIO_FASTIOV, iov, &kmsg->msg_iter);
return err < 0 ? err : 0;
}
diff --git a/net/core/Makefile b/net/core/Makefile
index a8e4f737692b..5857cec87b83 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -4,7 +4,8 @@
#
obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
+ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \
+ flow_dissector.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d9c37fd10809..94374d529ea4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);
return 0;
}
@@ -75,8 +75,8 @@ void bpf_sk_storage_free(struct sock *sk)
* sk_storage.
*/
bpf_selem_unlink_map(selem);
- free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
- selem, true);
+ free_sk_storage = bpf_selem_unlink_storage_nolock(
+ sk_storage, selem, true, false);
}
raw_spin_unlock_bh(&sk_storage->lock);
rcu_read_unlock();
@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) {
sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value,
- map_flags);
+ map_flags, GFP_ATOMIC);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
- copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
if (!copy_selem)
return NULL;
@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
bpf_selem_link_map(smap, copy_selem);
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
- ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
+ ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
@@ -255,8 +255,9 @@ out:
return ret;
}
-BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
- void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
+ void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST);
+ BPF_NOEXIST, gfp_flags);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -309,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
/* same check as in sock_kmalloc() */
- if (size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ if (size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
atomic_add(size, &sk->sk_omem_alloc);
return 0;
}
@@ -337,7 +339,7 @@ bpf_sk_storage_ptr(void *owner)
return &sk->sk_bpf_storage;
}
-static int sk_storage_map_btf_id;
+BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops sk_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -348,8 +350,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_name = "bpf_local_storage_map",
- .map_btf_id = &sk_storage_map_btf_id,
+ .map_btf_id = &sk_storage_map_btf_ids[0],
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
@@ -405,6 +406,8 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
btf_vmlinux = bpf_get_btf_vmlinux();
+ if (IS_ERR_OR_NULL(btf_vmlinux))
+ return false;
btf_id = prog->aux->attach_btf_id;
t = btf_type_by_id(btf_vmlinux, btf_id);
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -417,14 +420,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return false;
}
-BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
- void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
+ void *, value, u64, flags, gfp_t, gfp_flags)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
- return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
+ return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
+ gfp_flags);
}
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
@@ -871,10 +876,18 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
{
struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
seq_info->map = aux->map;
return 0;
}
+static void bpf_iter_fini_sk_storage_map(void *priv_data)
+{
+ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+
+ bpf_map_put_with_uref(seq_info->map);
+}
+
static int bpf_iter_attach_map(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
@@ -892,7 +905,7 @@ static int bpf_iter_attach_map(struct bpf_prog *prog,
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
goto put_map;
- if (prog->aux->max_rdonly_access > map->value_size) {
+ if (prog->aux->max_rdwr_access > map->value_size) {
err = -EACCES;
goto put_map;
}
@@ -920,7 +933,7 @@ static const struct seq_operations bpf_sk_storage_map_seq_ops = {
static const struct bpf_iter_seq_info iter_seq_info = {
.seq_ops = &bpf_sk_storage_map_seq_ops,
.init_seq_private = bpf_iter_init_sk_storage_map,
- .fini_seq_private = NULL,
+ .fini_seq_private = bpf_iter_fini_sk_storage_map,
.seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
};
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee290776c661..e4ff2db40c98 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -62,8 +62,6 @@
#include <trace/events/skb.h>
#include <net/busy_poll.h>
-#include "datagram.h"
-
/*
* Is a socket 'connection oriented' ?
*/
@@ -310,12 +308,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
EXPORT_SYMBOL(__skb_recv_datagram);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
- int noblock, int *err)
+ int *err)
{
int off = 0;
- return __skb_recv_datagram(sk, &sk->sk_receive_queue,
- flags | (noblock ? MSG_DONTWAIT : 0),
+ return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags,
&off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
@@ -323,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
consume_skb(skb);
- sk_mem_reclaim_partial(sk);
}
EXPORT_SYMBOL(skb_free_datagram);
@@ -339,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len);
skb_orphan(skb);
- sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow);
/* skb is now orphaned, can be freed outside of locked section */
@@ -399,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
NULL);
kfree_skb(skb);
- sk_mem_reclaim_partial(sk);
return err;
}
EXPORT_SYMBOL(skb_kill_datagram);
@@ -616,10 +610,16 @@ fault:
}
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length)
+int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, struct iov_iter *from,
+ size_t length)
{
- int frag = skb_shinfo(skb)->nr_frags;
+ int frag;
+
+ if (msg && msg->msg_ubuf && msg->sg_from_iter)
+ return msg->sg_from_iter(sk, skb, from, length);
+
+ frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
@@ -632,12 +632,11 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
- copied = iov_iter_get_pages(from, pages, length,
+ copied = iov_iter_get_pages2(from, pages, length,
MAX_SKB_FRAGS - frag, &start);
if (copied < 0)
return -EFAULT;
- iov_iter_advance(from, copied);
length -= copied;
truesize = PAGE_ALIGN(copied + start);
@@ -678,7 +677,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
page_ref_sub(last_head, refs);
refs = 0;
}
- skb_fill_page_desc(skb, frag++, head, start, size);
+ skb_fill_page_desc_noacc(skb, frag++, head, start, size);
}
if (refs)
page_ref_sub(last_head, refs);
@@ -705,7 +704,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
if (skb_copy_datagram_from_iter(skb, 0, from, copy))
return -EFAULT;
- return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+ return __zerocopy_sg_from_iter(NULL, NULL, skb, from, ~0U);
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
diff --git a/net/core/datagram.h b/net/core/datagram.h
deleted file mode 100644
index bcfb75bfa3b2..000000000000
--- a/net/core/datagram.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _NET_CORE_DATAGRAM_H_
-#define _NET_CORE_DATAGRAM_H_
-
-#include <linux/types.h>
-
-struct sock;
-struct sk_buff;
-struct iov_iter;
-
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
-
-#endif /* _NET_CORE_DATAGRAM_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 84a0d9542fe9..3be256051e99 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include "dev.h"
#include "net-sysfs.h"
@@ -216,18 +217,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
-static inline void rps_lock(struct softnet_data *sd)
+static inline void rps_lock_irqsave(struct softnet_data *sd,
+ unsigned long *flags)
{
-#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
-#endif
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_save(*flags);
}
-static inline void rps_unlock(struct softnet_data *sd)
+static inline void rps_lock_irq_disable(struct softnet_data *sd)
{
-#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
-#endif
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_lock_irq(&sd->input_pkt_queue.lock);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
+}
+
+static inline void rps_unlock_irq_restore(struct softnet_data *sd,
+ unsigned long *flags)
+{
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_restore(*flags);
+}
+
+static inline void rps_unlock_irq_enable(struct softnet_data *sd)
+{
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_unlock_irq(&sd->input_pkt_queue.lock);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
}
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
@@ -320,7 +341,6 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return 0;
}
-EXPORT_SYMBOL(netdev_name_node_alt_create);
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
@@ -348,7 +368,6 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
return 0;
}
-EXPORT_SYMBOL(netdev_name_node_alt_destroy);
static void netdev_name_node_alt_flush(struct net_device *dev)
{
@@ -378,16 +397,18 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev)
+static void unlist_netdevice(struct net_device *dev, bool lock)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
- write_lock(&dev_base_lock);
+ if (lock)
+ write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- write_unlock(&dev_base_lock);
+ if (lock)
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -663,11 +684,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev;
struct net_device_path_ctx ctx = {
.dev = dev,
- .daddr = daddr,
};
struct net_device_path *path;
int ret = 0;
+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev;
@@ -683,6 +704,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
if (WARN_ON_ONCE(last_dev == ctx.dev))
return -1;
}
+
+ if (!ctx.dev)
+ return ret;
+
path = dev_fwd_path(stack);
if (!path)
return -1;
@@ -1037,7 +1062,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, name_node->name, IFNAMSIZ))
- set_bit(i, inuse);
+ __set_bit(i, inuse);
}
if (!sscanf(d->name, name, &i))
continue;
@@ -1047,7 +1072,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
- set_bit(i, inuse);
+ __set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, max_netdevices);
@@ -1075,7 +1100,7 @@ static int dev_alloc_name_ns(struct net *net,
BUG_ON(!net);
ret = __dev_alloc_name(net, name, buf);
if (ret >= 0)
- strlcpy(dev->name, buf, IFNAMSIZ);
+ strscpy(dev->name, buf, IFNAMSIZ);
return ret;
}
@@ -1112,7 +1137,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev,
else if (netdev_name_in_use(net, name))
return -EEXIST;
else if (dev->name != name)
- strlcpy(dev->name, name, IFNAMSIZ);
+ strscpy(dev->name, name, IFNAMSIZ);
return 0;
}
@@ -1602,7 +1627,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
- N(PRE_CHANGEADDR)
+ N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
+ N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1919,6 +1945,32 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+/**
+ * call_netdevice_notifiers_info_robust - call per-netns notifier blocks
+ * for and rollback on error
+ * @val_up: value passed unmodified to notifier function
+ * @val_down: value passed unmodified to the notifier function when
+ * recovering from an error on @val_up
+ * @info: notifier information data
+ *
+ * Call all per-netns network notifier blocks, but not notifier blocks on
+ * the global notifier chain. Parameters and return value are as for
+ * raw_notifier_call_chain_robust().
+ */
+
+static int
+call_netdevice_notifiers_info_robust(unsigned long val_up,
+ unsigned long val_down,
+ struct netdev_notifier_info *info)
+{
+ struct net *net = dev_net(info->dev);
+
+ ASSERT_RTNL();
+
+ return raw_notifier_call_chain_robust(&net->netdev_chain,
+ val_up, val_down, info);
+}
+
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack)
@@ -2000,7 +2052,8 @@ void net_dec_egress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
-static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
+DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
+EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
static atomic_t netstamp_wanted;
@@ -2061,14 +2114,15 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
skb->tstamp = 0;
+ skb->mono_delivery_time = 0;
if (static_branch_unlikely(&netstamp_needed_key))
- __net_timestamp(skb);
+ skb->tstamp = ktime_get_real();
}
#define net_timestamp_check(COND, SKB) \
if (static_branch_unlikely(&netstamp_needed_key)) { \
if ((COND) && !(SKB)->tstamp) \
- __net_timestamp(SKB); \
+ (SKB)->tstamp = ktime_get_real(); \
} \
bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
@@ -2941,15 +2995,73 @@ undo_rx:
EXPORT_SYMBOL(netif_set_real_num_queues);
/**
+ * netif_set_tso_max_size() - set the max size of TSO frames supported
+ * @dev: netdev to update
+ * @size: max skb->len of a TSO frame
+ *
+ * Set the limit on the size of TSO super-frames the device can handle.
+ * Unless explicitly set the stack will assume the value of
+ * %GSO_LEGACY_MAX_SIZE.
+ */
+void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
+{
+ dev->tso_max_size = min(GSO_MAX_SIZE, size);
+ if (size < READ_ONCE(dev->gso_max_size))
+ netif_set_gso_max_size(dev, size);
+}
+EXPORT_SYMBOL(netif_set_tso_max_size);
+
+/**
+ * netif_set_tso_max_segs() - set the max number of segs supported for TSO
+ * @dev: netdev to update
+ * @segs: max number of TCP segments
+ *
+ * Set the limit on the number of TCP segments the device can generate from
+ * a single TSO super-frame.
+ * Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS.
+ */
+void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs)
+{
+ dev->tso_max_segs = segs;
+ if (segs < READ_ONCE(dev->gso_max_segs))
+ netif_set_gso_max_segs(dev, segs);
+}
+EXPORT_SYMBOL(netif_set_tso_max_segs);
+
+/**
+ * netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper
+ * @to: netdev to update
+ * @from: netdev from which to copy the limits
+ */
+void netif_inherit_tso_max(struct net_device *to, const struct net_device *from)
+{
+ netif_set_tso_max_size(to, from->tso_max_size);
+ netif_set_tso_max_segs(to, from->tso_max_segs);
+}
+EXPORT_SYMBOL(netif_inherit_tso_max);
+
+/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
- * This routine should set an upper limit on the number of RSS queues
- * used by default by multiqueue devices.
+ * Default value is the number of physical cores if there are only 1 or 2, or
+ * divided by 2 if there are more.
*/
int netif_get_num_default_rss_queues(void)
{
- return is_kdump_kernel() ?
- 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+ cpumask_var_t cpus;
+ int cpu, count = 0;
+
+ if (unlikely(is_kdump_kernel() || !zalloc_cpumask_var(&cpus, GFP_KERNEL)))
+ return 1;
+
+ cpumask_copy(cpus, cpu_online_mask);
+ for_each_cpu(cpu, cpus) {
+ ++count;
+ cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu));
+ }
+ free_cpumask_var(cpus);
+
+ return count > 2 ? DIV_ROUND_UP(count, 2) : count;
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);
@@ -3156,12 +3268,18 @@ int skb_checksum_help(struct sk_buff *skb)
}
offset = skb_checksum_start_offset(skb);
- BUG_ON(offset >= skb_headlen(skb));
+ ret = -EINVAL;
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
- BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
-
+ if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
if (ret)
goto out;
@@ -3468,7 +3586,6 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
dev_queue_xmit_nit(skb, dev);
len = skb->len;
- PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev, txq, more);
trace_net_dev_xmit(skb, rc, dev, len);
@@ -3586,7 +3703,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
out_kfree_skb:
kfree_skb(skb);
out_null:
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
return NULL;
}
@@ -3710,7 +3827,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
no_lock_out:
if (unlikely(to_free))
- kfree_skb_list(to_free);
+ kfree_skb_list_reason(to_free,
+ SKB_DROP_REASON_QDISC_DROP);
return rc;
}
@@ -3765,7 +3883,7 @@ no_lock_out:
}
spin_unlock(root_lock);
if (unlikely(to_free))
- kfree_skb_list(to_free);
+ kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3809,9 +3927,9 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_LOOPBACK;
if (skb->ip_summed == CHECKSUM_NONE)
skb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!skb_dst(skb));
+ DEBUG_NET_WARN_ON_ONCE(!skb_dst(skb));
skb_dst_force(skb);
- netif_rx_ni(skb);
+ netif_rx(skb);
return 0;
}
EXPORT_SYMBOL(dev_loopback_xmit);
@@ -3840,7 +3958,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
case TC_ACT_SHOT:
mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -3860,6 +3978,25 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
}
+
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
+{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
#endif /* CONFIG_NET_EGRESS */
#ifdef CONFIG_XPS
@@ -4002,40 +4139,36 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
}
/**
- * __dev_queue_xmit - transmit a buffer
- * @skb: buffer to transmit
- * @sb_dev: suboordinate device used for L2 forwarding offload
+ * __dev_queue_xmit() - transmit a buffer
+ * @skb: buffer to transmit
+ * @sb_dev: suboordinate device used for L2 forwarding offload
*
- * Queue a buffer for transmission to a network device. The caller must
- * have set the device and priority and built the buffer before calling
- * this function. The function can be called from an interrupt.
+ * Queue a buffer for transmission to a network device. The caller must
+ * have set the device and priority and built the buffer before calling
+ * this function. The function can be called from an interrupt.
*
- * A negative errno code is returned on a failure. A success does not
- * guarantee the frame will be transmitted as it may be dropped due
- * to congestion or traffic shaping.
+ * When calling this method, interrupts MUST be enabled. This is because
+ * the BH enable code must have IRQs enabled so that it will not deadlock.
*
- * -----------------------------------------------------------------------------------
- * I notice this method can also return errors from the queue disciplines,
- * including NET_XMIT_DROP, which is a positive value. So, errors can also
- * be positive.
+ * Regardless of the return value, the skb is consumed, so it is currently
+ * difficult to retry a send to this method. (You can bump the ref count
+ * before sending to hold a reference for retry if you are careful.)
*
- * Regardless of the return value, the skb is consumed, so it is currently
- * difficult to retry a send to this method. (You can bump the ref count
- * before sending to hold a reference for retry if you are careful.)
- *
- * When calling this method, interrupts MUST be enabled. This is because
- * the BH enable code must have IRQs enabled so that it will not deadlock.
- * --BLG
+ * Return:
+ * * 0 - buffer successfully transmitted
+ * * positive qdisc return code - NET_XMIT_DROP etc.
+ * * negative errno - other errors
*/
-static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
+int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
struct net_device *dev = skb->dev;
- struct netdev_queue *txq;
+ struct netdev_queue *txq = NULL;
struct Qdisc *q;
int rc = -ENOMEM;
bool again = false;
skb_reset_mac_header(skb);
+ skb_assert_len(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
@@ -4058,11 +4191,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb)
goto out;
}
+
+ netdev_xmit_skip_txqueue(false);
+
nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
nf_skip_egress(skb, false);
+
+ if (netdev_xmit_txqueue_skipped())
+ txq = netdev_tx_queue_mapping(dev, skb);
}
#endif
/* If device/qdisc don't need skb->dst, release it right now while
@@ -4073,7 +4212,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
else
skb_dst_force(skb);
- txq = netdev_core_pick_tx(dev, skb, sb_dev);
+ if (!txq)
+ txq = netdev_core_pick_tx(dev, skb, sb_dev);
+
q = rcu_dereference_bh(txq->qdisc);
trace_net_dev_queue(skb);
@@ -4108,7 +4249,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb)
goto out;
- PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
@@ -4136,25 +4276,14 @@ recursion_alert:
rc = -ENETDOWN;
rcu_read_unlock_bh();
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb_list(skb);
return rc;
out:
rcu_read_unlock_bh();
return rc;
}
-
-int dev_queue_xmit(struct sk_buff *skb)
-{
- return __dev_queue_xmit(skb, NULL);
-}
-EXPORT_SYMBOL(dev_queue_xmit);
-
-int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
-{
- return __dev_queue_xmit(skb, sb_dev);
-}
-EXPORT_SYMBOL(dev_queue_xmit_accel);
+EXPORT_SYMBOL(__dev_queue_xmit);
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
@@ -4174,7 +4303,6 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
skb_set_queue_mapping(skb, queue_id);
txq = skb_get_tx_queue(dev, skb);
- PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
local_bh_disable();
@@ -4188,7 +4316,7 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_enable();
return ret;
drop:
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb_list(skb);
return NET_XMIT_DROP;
}
@@ -4202,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
+unsigned int sysctl_skb_defer_max __read_mostly = 64;
int netdev_budget __read_mostly = 300;
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
@@ -4217,6 +4346,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
{
struct task_struct *thread;
+ lockdep_assert_irqs_disabled();
+
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
* napi_enable()/dev_set_threaded().
@@ -4451,16 +4582,25 @@ static void rps_trigger_softirq(void *data)
#endif /* CONFIG_RPS */
+/* Called from hardirq (IPI) context */
+static void trigger_rx_softirq(void *data)
+{
+ struct softnet_data *sd = data;
+
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ smp_store_release(&sd->defer_ipi_scheduled, 0);
+}
+
/*
* Check if this softnet_data structure is another cpu one
* If yes, queue it to our IPI list and return 1
* If no, return 0
*/
-static int rps_ipi_queued(struct softnet_data *sd)
+static int napi_schedule_rps(struct softnet_data *sd)
{
-#ifdef CONFIG_RPS
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
+#ifdef CONFIG_RPS
if (sd != mysd) {
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
@@ -4469,6 +4609,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
return 1;
}
#endif /* CONFIG_RPS */
+ __napi_schedule_irqoff(&mysd->backlog);
return 0;
}
@@ -4483,7 +4624,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (netdev_max_backlog >> 1))
+ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4519,46 +4660,42 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
{
+ enum skb_drop_reason reason;
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
sd = &per_cpu(softnet_data, cpu);
- local_irq_save(flags);
-
- rps_lock(sd);
+ rps_lock_irqsave(sd, &flags);
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
input_queue_tail_incr_save(sd, qtail);
- rps_unlock(sd);
- local_irq_restore(flags);
+ rps_unlock_irq_restore(sd, &flags);
return NET_RX_SUCCESS;
}
/* Schedule NAPI for backlog device
* We can use non atomic operation since we own the queue lock
*/
- if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
- if (!rps_ipi_queued(sd))
- ____napi_schedule(sd, &sd->backlog);
- }
+ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
+ napi_schedule_rps(sd);
goto enqueue;
}
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
drop:
sd->dropped++;
- rps_unlock(sd);
+ rps_unlock_irq_restore(sd, &flags);
- local_irq_restore(flags);
-
- atomic_long_inc(&skb->dev->rx_dropped);
- kfree_skb(skb);
+ dev_core_stats_rx_dropped_inc(skb->dev);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -4727,7 +4864,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
}
/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
+ * network taps in order to match in-driver-XDP behavior. This also means
+ * that XDP packets are able to starve other packets going through a qdisc,
+ * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
+ * queues, so they do not have this starvation issue.
*/
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
{
@@ -4739,7 +4879,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
txq = netdev_core_pick_tx(dev, skb, NULL);
cpu = smp_processor_id();
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_xmit_stopped(txq)) {
+ if (!netif_xmit_frozen_or_drv_stopped(txq)) {
rc = netdev_start_xmit(skb, dev, txq, 0);
if (dev_xmit_complete(rc))
free_skb = false;
@@ -4747,6 +4887,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
HARD_TX_UNLOCK(dev, txq);
if (free_skb) {
trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb(skb);
}
}
@@ -4778,7 +4919,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -4787,7 +4928,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -4796,7 +4937,6 @@ static int netif_rx_internal(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- preempt_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -4806,78 +4946,72 @@ static int netif_rx_internal(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- preempt_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
- put_cpu();
+ ret = enqueue_to_backlog(skb, smp_processor_id(), &qtail);
}
return ret;
}
/**
+ * __netif_rx - Slightly optimized version of netif_rx
+ * @skb: buffer to post
+ *
+ * This behaves as netif_rx except that it does not disable bottom halves.
+ * As a result this function may only be invoked from the interrupt context
+ * (either hard or soft interrupt).
+ */
+int __netif_rx(struct sk_buff *skb)
+{
+ int ret;
+
+ lockdep_assert_once(hardirq_count() | softirq_count());
+
+ trace_netif_rx_entry(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+ return ret;
+}
+EXPORT_SYMBOL(__netif_rx);
+
+/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
- * the upper (protocol) levels to process. It always succeeds. The buffer
- * may be dropped during processing for congestion control or by the
- * protocol layers.
+ * the upper (protocol) levels to process via the backlog NAPI device. It
+ * always succeeds. The buffer may be dropped during processing for
+ * congestion control or by the protocol layers.
+ * The network buffer is passed via the backlog NAPI device. Modern NIC
+ * driver should use NAPI and GRO.
+ * This function can used from interrupt and from process context. The
+ * caller from process context must not disable interrupts before invoking
+ * this function.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_DROP (packet was dropped)
*
*/
-
int netif_rx(struct sk_buff *skb)
{
+ bool need_bh_off = !(hardirq_count() | softirq_count());
int ret;
+ if (need_bh_off)
+ local_bh_disable();
trace_netif_rx_entry(skb);
-
ret = netif_rx_internal(skb);
trace_netif_rx_exit(ret);
-
+ if (need_bh_off)
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL(netif_rx);
-int netif_rx_ni(struct sk_buff *skb)
-{
- int err;
-
- trace_netif_rx_ni_entry(skb);
-
- preempt_disable();
- err = netif_rx_internal(skb);
- if (local_softirq_pending())
- do_softirq();
- preempt_enable();
- trace_netif_rx_ni_exit(err);
-
- return err;
-}
-EXPORT_SYMBOL(netif_rx_ni);
-
-int netif_rx_any_context(struct sk_buff *skb)
-{
- /*
- * If invoked from contexts which do not invoke bottom half
- * processing either at return from interrupt or when softrqs are
- * reenabled, use netif_rx_ni() which invokes bottomhalf processing
- * directly.
- */
- if (in_interrupt())
- return netif_rx(skb);
- else
- return netif_rx_ni(skb);
-}
-EXPORT_SYMBOL(netif_rx_any_context);
-
static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -5001,12 +5135,14 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
break;
case TC_ACT_SHOT:
mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
consume_skb(skb);
+ *ret = NET_RX_SUCCESS;
return NULL;
case TC_ACT_REDIRECT:
/* skb_mac_header check was done by cls/act_bpf, so
@@ -5019,8 +5155,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
*another = true;
break;
}
+ *ret = NET_RX_SUCCESS;
return NULL;
case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
return NULL;
default:
break;
@@ -5147,7 +5285,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb);
+ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5319,10 +5457,10 @@ check_vlan_id:
} else {
drop:
if (!deliver_exact)
- atomic_long_inc(&skb->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(skb->dev);
else
- atomic_long_inc(&skb->dev->rx_nohandler);
- kfree_skb(skb);
+ dev_core_stats_rx_nohandler_inc(skb->dev);
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -5530,7 +5668,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5560,7 +5698,7 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5650,8 +5788,7 @@ static void flush_backlog(struct work_struct *work)
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
@@ -5659,8 +5796,7 @@ static void flush_backlog(struct work_struct *work)
input_queue_head_incr(sd);
}
}
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
@@ -5678,16 +5814,14 @@ static bool flush_required(int cpu)
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
bool do_flush;
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
/* as insertion into process_queue happens with the rps lock held,
* process_queue access may race only with dequeue
*/
do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
!skb_queue_empty_lockless(&sd->process_queue);
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
return do_flush;
#endif
@@ -5788,7 +5922,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = dev_rx_weight;
+ napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -5802,8 +5936,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
@@ -5819,8 +5952,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue);
}
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
}
return work;
@@ -6230,8 +6362,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
-void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight)
+void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
{
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
@@ -6257,6 +6389,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
+ napi_get_frags_check(napi);
/* Create kthread for this napi if dev->threaded is set.
* Clear dev->threaded if kthread creation failed so that
* threaded mode will not be enabled in napi_enable().
@@ -6264,7 +6397,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
}
-EXPORT_SYMBOL(netif_napi_add);
+EXPORT_SYMBOL(netif_napi_add_weight);
void napi_disable(struct napi_struct *n)
{
@@ -6493,12 +6626,34 @@ static int napi_threaded_poll(void *data)
return 0;
}
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+ struct sk_buff *skb, *next;
+ unsigned long flags;
+
+ /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+ if (!READ_ONCE(sd->defer_list))
+ return;
+
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ skb = sd->defer_list;
+ sd->defer_list = NULL;
+ sd->defer_count = 0;
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ while (skb != NULL) {
+ next = skb->next;
+ napi_consume_skb(skb, 1);
+ skb = next;
+ }
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(netdev_budget_usecs);
- int budget = netdev_budget;
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+ int budget = READ_ONCE(netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -6509,9 +6664,11 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
for (;;) {
struct napi_struct *n;
+ skb_defer_free_flush(sd);
+
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- return;
+ goto end;
break;
}
@@ -6538,6 +6695,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
+end:;
}
struct netdev_adjacent {
@@ -7145,6 +7303,16 @@ static int __netdev_update_upper_level(struct net_device *dev,
return 0;
}
+#ifdef CONFIG_LOCKDEP
+static LIST_HEAD(net_unlink_list);
+
+static void net_unlink_todo(struct net_device *dev)
+{
+ if (list_empty(&dev->unlink_list))
+ list_add_tail(&dev->unlink_list, &net_unlink_list);
+}
+#endif
+
static int __netdev_update_lower_level(struct net_device *dev,
struct netdev_nested_priv *priv)
{
@@ -7307,7 +7475,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->ref_nr = 1;
adj->private = private;
adj->ignore = false;
- dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL);
+ netdev_hold(adj_dev, &adj->dev_tracker, GFP_KERNEL);
pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
@@ -7336,7 +7504,7 @@ remove_symlinks:
if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
free_adj:
- dev_put_track(adj_dev, &adj->dev_tracker);
+ netdev_put(adj_dev, &adj->dev_tracker);
kfree(adj);
return ret;
@@ -7378,7 +7546,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
list_del_rcu(&adj->list);
pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
- dev_put_track(adj_dev, &adj->dev_tracker);
+ netdev_put(adj_dev, &adj->dev_tracker);
kfree_rcu(adj, rcu);
}
@@ -7727,6 +7895,242 @@ void netdev_bonding_info_change(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_bonding_info_change);
+static int netdev_offload_xstats_enable_l3(struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+ int err;
+ int rc;
+
+ dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3),
+ GFP_KERNEL);
+ if (!dev->offload_xstats_l3)
+ return -ENOMEM;
+
+ rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE,
+ NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ err = notifier_to_errno(rc);
+ if (err)
+ goto free_stats;
+
+ return 0;
+
+free_stats:
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+ return err;
+}
+
+int netdev_offload_xstats_enable(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return netdev_offload_xstats_enable_l3(dev, extack);
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enable);
+
+static void netdev_offload_xstats_disable_l3(struct net_device *dev)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+
+ call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+}
+
+int netdev_offload_xstats_disable(struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ if (!netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ netdev_offload_xstats_disable_l3(dev);
+ return 0;
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_disable);
+
+static void netdev_offload_xstats_disable_all(struct net_device *dev)
+{
+ netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
+}
+
+static struct rtnl_hw_stats64 *
+netdev_offload_xstats_get_ptr(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return dev->offload_xstats_l3;
+ }
+
+ WARN_ON(1);
+ return NULL;
+}
+
+bool netdev_offload_xstats_enabled(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ return netdev_offload_xstats_get_ptr(dev, type);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enabled);
+
+struct netdev_notifier_offload_xstats_ru {
+ bool used;
+};
+
+struct netdev_notifier_offload_xstats_rd {
+ struct rtnl_hw_stats64 stats;
+ bool used;
+};
+
+static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
+ const struct rtnl_hw_stats64 *src)
+{
+ dest->rx_packets += src->rx_packets;
+ dest->tx_packets += src->tx_packets;
+ dest->rx_bytes += src->rx_bytes;
+ dest->tx_bytes += src->tx_bytes;
+ dest->rx_errors += src->rx_errors;
+ dest->tx_errors += src->tx_errors;
+ dest->rx_dropped += src->rx_dropped;
+ dest->tx_dropped += src->tx_dropped;
+ dest->multicast += src->multicast;
+}
+
+static int netdev_offload_xstats_get_used(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_ru report_used = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_used = &report_used,
+ };
+ int rc;
+
+ WARN_ON(!netdev_offload_xstats_enabled(dev, type));
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED,
+ &info.info);
+ *p_used = report_used.used;
+ return notifier_to_errno(rc);
+}
+
+static int netdev_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_rd report_delta = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_delta = &report_delta,
+ };
+ struct rtnl_hw_stats64 *stats;
+ int rc;
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return -EINVAL;
+
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
+ &info.info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ netdev_hw_stats64_add(stats, &report_delta.stats);
+
+ if (p_stats)
+ *p_stats = *stats;
+ *p_used = report_delta.used;
+
+ return notifier_to_errno(rc);
+}
+
+int netdev_offload_xstats_get(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats, bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (p_stats)
+ return netdev_offload_xstats_get_stats(dev, type, p_stats,
+ p_used, extack);
+ else
+ return netdev_offload_xstats_get_used(dev, type, p_used,
+ extack);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_get);
+
+void
+netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
+ const struct rtnl_hw_stats64 *stats)
+{
+ report_delta->used = true;
+ netdev_hw_stats64_add(&report_delta->stats, stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_delta);
+
+void
+netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
+{
+ report_used->used = true;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_used);
+
+void netdev_offload_xstats_push_delta(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ const struct rtnl_hw_stats64 *p_stats)
+{
+ struct rtnl_hw_stats64 *stats;
+
+ ASSERT_RTNL();
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return;
+
+ netdev_hw_stats64_add(stats, p_stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_push_delta);
+
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
* @dev: device
@@ -8347,7 +8751,6 @@ void dev_set_group(struct net_device *dev, int new_group)
{
dev->group = new_group;
}
-EXPORT_SYMBOL(dev_set_group);
/**
* dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
@@ -8462,7 +8865,6 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
return -ENODEV;
return ops->ndo_change_carrier(dev, new_carrier);
}
-EXPORT_SYMBOL(dev_change_carrier);
/**
* dev_get_phys_port_id - Get device physical port ID
@@ -8480,7 +8882,6 @@ int dev_get_phys_port_id(struct net_device *dev,
return -EOPNOTSUPP;
return ops->ndo_get_phys_port_id(dev, ppid);
}
-EXPORT_SYMBOL(dev_get_phys_port_id);
/**
* dev_get_phys_port_name - Get device physical port name
@@ -8503,7 +8904,6 @@ int dev_get_phys_port_name(struct net_device *dev,
}
return devlink_compat_phys_port_name_get(dev, name, len);
}
-EXPORT_SYMBOL(dev_get_phys_port_name);
/**
* dev_get_port_parent_id - Get the device's port parent identifier
@@ -8585,7 +8985,6 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
dev->proto_down = proto_down;
return 0;
}
-EXPORT_SYMBOL(dev_change_proto_down);
/**
* dev_change_proto_down_reason - proto down reason
@@ -8610,7 +9009,6 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
}
}
}
-EXPORT_SYMBOL(dev_change_proto_down_reason);
struct bpf_xdp_link {
struct bpf_link link;
@@ -8981,6 +9379,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
goto out_unlock;
}
old_prog = link->prog;
+ if (old_prog->type != new_prog->type ||
+ old_prog->expected_attach_type != new_prog->expected_attach_type) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (old_prog == new_prog) {
/* no-op, don't disturb drivers */
bpf_prog_put(new_prog);
@@ -9131,13 +9535,13 @@ static int dev_new_index(struct net *net)
}
/* Delayed registration/unregisteration */
-static LIST_HEAD(net_todo_list);
+LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
- dev_net(dev)->dev_unreg_count++;
+ atomic_inc(&dev_net(dev)->dev_unreg_count);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -9538,22 +9942,14 @@ void netif_tx_stop_all_queues(struct net_device *dev)
EXPORT_SYMBOL(netif_tx_stop_all_queues);
/**
- * register_netdevice - register a network device
- * @dev: device to register
+ * register_netdevice() - register a network device
+ * @dev: device to register
*
- * Take a completed network device structure and add it to the kernel
- * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
- * chain. 0 is returned on success. A negative errno code is returned
- * on a failure to set up the device, or if the name is a duplicate.
- *
- * Callers must hold the rtnl semaphore. You may want
- * register_netdev() instead of this.
- *
- * BUGS:
- * The locking appears insufficient to guarantee two parallel registers
- * will not get the same name.
+ * Take a prepared network device structure and make it externally accessible.
+ * A %NETDEV_REGISTER message is sent to the netdev notifier chain.
+ * Callers must hold the rtnl lock - you may want register_netdev()
+ * instead of this.
*/
-
int register_netdevice(struct net_device *dev)
{
int ret;
@@ -9659,11 +10055,11 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret) {
- dev->reg_state = NETREG_UNREGISTERED;
+ write_lock(&dev_base_lock);
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
goto err_uninit;
- }
- dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
@@ -9677,8 +10073,10 @@ int register_netdevice(struct net_device *dev)
linkwatch_init_dev(dev);
dev_init_scheduler(dev);
- dev_hold(dev);
+
+ netdev_hold(dev, &dev->dev_registered_tracker, GFP_KERNEL);
list_netdevice(dev);
+
add_device_randomness(dev->dev_addr, dev->addr_len);
/* If the device has permanent device address, driver should
@@ -9807,8 +10205,8 @@ int netdev_unregister_timeout_secs __read_mostly = 10;
#define WAIT_REFS_MIN_MSECS 1
#define WAIT_REFS_MAX_MSECS 250
/**
- * netdev_wait_allrefs - wait until all references are gone.
- * @dev: target net_device
+ * netdev_wait_allrefs_any - wait until all references are gone.
+ * @list: list of net_devices to wait on
*
* This is called when unregistering network devices.
*
@@ -9818,37 +10216,42 @@ int netdev_unregister_timeout_secs __read_mostly = 10;
* We can get stuck here if buggy protocols don't correctly
* call dev_put.
*/
-static void netdev_wait_allrefs(struct net_device *dev)
+static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
{
unsigned long rebroadcast_time, warning_time;
- int wait = 0, refcnt;
-
- linkwatch_forget_dev(dev);
+ struct net_device *dev;
+ int wait = 0;
rebroadcast_time = warning_time = jiffies;
- refcnt = netdev_refcnt_read(dev);
- while (refcnt != 1) {
+ list_for_each_entry(dev, list, todo_list)
+ if (netdev_refcnt_read(dev) == 1)
+ return dev;
+
+ while (true) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
rtnl_lock();
/* Rebroadcast unregister notification */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ list_for_each_entry(dev, list, todo_list)
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
__rtnl_unlock();
rcu_barrier();
rtnl_lock();
- if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
- &dev->state)) {
- /* We must not have linkwatch events
- * pending on unregister. If this
- * happens, we simply run the queue
- * unscheduled, resulting in a noop
- * for this device.
- */
- linkwatch_run_queue();
- }
+ list_for_each_entry(dev, list, todo_list)
+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &dev->state)) {
+ /* We must not have linkwatch events
+ * pending on unregister. If this
+ * happens, we simply run the queue
+ * unscheduled, resulting in a noop
+ * for this device.
+ */
+ linkwatch_run_queue();
+ break;
+ }
__rtnl_unlock();
@@ -9863,14 +10266,18 @@ static void netdev_wait_allrefs(struct net_device *dev)
wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
}
- refcnt = netdev_refcnt_read(dev);
+ list_for_each_entry(dev, list, todo_list)
+ if (netdev_refcnt_read(dev) == 1)
+ return dev;
+
+ if (time_after(jiffies, warning_time +
+ READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
+ list_for_each_entry(dev, list, todo_list) {
+ pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ dev->name, netdev_refcnt_read(dev));
+ ref_tracker_dir_print(&dev->refcnt_tracker, 10);
+ }
- if (refcnt != 1 &&
- time_after(jiffies, warning_time +
- netdev_unregister_timeout_secs * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
- dev->name, refcnt);
- ref_tracker_dir_print(&dev->refcnt_tracker, 10);
warning_time = jiffies;
}
}
@@ -9902,6 +10309,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
*/
void netdev_run_todo(void)
{
+ struct net_device *dev, *tmp;
struct list_head list;
#ifdef CONFIG_LOCKDEP
struct list_head unlink_list;
@@ -9922,26 +10330,26 @@ void netdev_run_todo(void)
__rtnl_unlock();
-
/* Wait for rcu callbacks to finish before next phase */
if (!list_empty(&list))
rcu_barrier();
- while (!list_empty(&list)) {
- struct net_device *dev
- = list_first_entry(&list, struct net_device, todo_list);
- list_del(&dev->todo_list);
-
+ list_for_each_entry_safe(dev, tmp, &list, todo_list) {
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
- pr_err("network todo '%s' but state %d\n",
- dev->name, dev->reg_state);
- dump_stack();
+ netdev_WARN(dev, "run_todo but not unregistering\n");
+ list_del(&dev->todo_list);
continue;
}
+ write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
+ write_unlock(&dev_base_lock);
+ linkwatch_forget_dev(dev);
+ }
- netdev_wait_allrefs(dev);
+ while (!list_empty(&list)) {
+ dev = netdev_wait_allrefs_any(&list);
+ list_del(&dev->todo_list);
/* paranoia */
BUG_ON(netdev_refcnt_read(dev) != 1);
@@ -9949,19 +10357,14 @@ void netdev_run_todo(void)
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
-#if IS_ENABLED(CONFIG_DECNET)
- WARN_ON(dev->dn_ptr);
-#endif
+
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
free_netdev(dev);
- /* Report a network device has been unregistered */
- rtnl_lock();
- dev_net(dev)->dev_unreg_count--;
- __rtnl_unlock();
- wake_up(&netdev_unregistering_wq);
+ if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
+ wake_up(&netdev_unregistering_wq);
/* Free network device */
kobject_put(&dev->dev.kobj);
@@ -9997,6 +10400,21 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
+{
+ struct net_device_core_stats __percpu *p;
+
+ p = alloc_percpu_gfp(struct net_device_core_stats,
+ GFP_ATOMIC | __GFP_NOWARN);
+
+ if (p && cmpxchg(&dev->core_stats, NULL, p))
+ free_percpu(p);
+
+ /* This READ_ONCE() pairs with the cmpxchg() above */
+ return READ_ONCE(dev->core_stats);
+}
+EXPORT_SYMBOL(netdev_core_stats_alloc);
+
/**
* dev_get_stats - get network device statistics
* @dev: device to get statistics from
@@ -10011,6 +10429,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
struct rtnl_link_stats64 *storage)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ const struct net_device_core_stats __percpu *p;
if (ops->ndo_get_stats64) {
memset(storage, 0, sizeof(*storage));
@@ -10020,9 +10439,21 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
+
+ /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
+ p = READ_ONCE(dev->core_stats);
+ if (p) {
+ const struct net_device_core_stats *core_stats;
+ int i;
+
+ for_each_possible_cpu(i) {
+ core_stats = per_cpu_ptr(p, i);
+ storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+ storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+ storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
+ storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
+ }
+ }
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@ -10040,23 +10471,23 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
int cpu;
for_each_possible_cpu(cpu) {
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
const struct pcpu_sw_netstats *stats;
- struct pcpu_sw_netstats tmp;
unsigned int start;
stats = per_cpu_ptr(netstats, cpu);
do {
start = u64_stats_fetch_begin_irq(&stats->syncp);
- tmp.rx_packets = stats->rx_packets;
- tmp.rx_bytes = stats->rx_bytes;
- tmp.tx_packets = stats->tx_packets;
- tmp.tx_bytes = stats->tx_bytes;
+ rx_packets = u64_stats_read(&stats->rx_packets);
+ rx_bytes = u64_stats_read(&stats->rx_bytes);
+ tx_packets = u64_stats_read(&stats->tx_packets);
+ tx_bytes = u64_stats_read(&stats->tx_bytes);
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
- s->rx_packets += tmp.rx_packets;
- s->rx_bytes += tmp.rx_bytes;
- s->tx_packets += tmp.tx_packets;
- s->tx_bytes += tmp.tx_bytes;
+ s->rx_packets += rx_packets;
+ s->rx_bytes += rx_bytes;
+ s->tx_packets += tx_packets;
+ s->tx_bytes += tx_bytes;
}
}
EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
@@ -10166,7 +10597,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_dev;
- dev_hold(dev);
+ __dev_hold(dev);
#else
refcount_set(&dev->dev_refcnt, 1);
#endif
@@ -10179,9 +10610,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
- dev->gro_max_size = GRO_MAX_SIZE;
+ dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+ dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
+ dev->tso_max_segs = TSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP
@@ -10284,6 +10717,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
#endif
+ free_percpu(dev->core_stats);
+ dev->core_stats = NULL;
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
@@ -10387,9 +10822,10 @@ void unregister_netdevice_many(struct list_head *head)
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- unlist_netdevice(dev);
-
+ write_lock(&dev_base_lock);
+ unlist_netdevice(dev, false);
dev->reg_state = NETREG_UNREGISTERING;
+ write_unlock(&dev_base_lock);
}
flush_all_backlogs();
@@ -10403,6 +10839,8 @@ void unregister_netdevice_many(struct list_head *head)
dev_xdp_uninstall(dev);
+ netdev_offload_xstats_disable_all(dev);
+
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
*/
@@ -10443,7 +10881,7 @@ void unregister_netdevice_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- dev_put(dev);
+ netdev_put(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
}
@@ -10534,7 +10972,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev);
+ unlist_netdevice(dev, true);
synchronize_net();
@@ -10668,11 +11106,11 @@ static int dev_cpu_dead(unsigned int oldcpu)
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
- netif_rx_ni(skb);
+ netif_rx(skb);
input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
- netif_rx_ni(skb);
+ netif_rx(skb);
input_queue_head_incr(oldsd);
}
@@ -10726,8 +11164,7 @@ static int __net_init netdev_init(struct net *net)
BUILD_BUG_ON(GRO_HASH_BUCKETS >
8 * sizeof_field(struct napi_struct, gro_bitmask));
- if (net != &init_net)
- INIT_LIST_HEAD(&net->dev_base_head);
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
@@ -10843,14 +11280,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
.exit = netdev_exit,
};
-static void __net_exit default_device_exit(struct net *net)
+static void __net_exit default_device_exit_net(struct net *net)
{
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
* initial network namespace
*/
- rtnl_lock();
+ ASSERT_RTNL();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
@@ -10874,35 +11311,6 @@ static void __net_exit default_device_exit(struct net *net)
BUG();
}
}
- rtnl_unlock();
-}
-
-static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
-{
- /* Return with the rtnl_lock held when there are no network
- * devices unregistering in any network namespace in net_list.
- */
- struct net *net;
- bool unregistering;
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- add_wait_queue(&netdev_unregistering_wq, &wait);
- for (;;) {
- unregistering = false;
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- if (net->dev_unreg_count > 0) {
- unregistering = true;
- break;
- }
- }
- if (!unregistering)
- break;
- __rtnl_unlock();
-
- wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
- }
- remove_wait_queue(&netdev_unregistering_wq, &wait);
}
static void __net_exit default_device_exit_batch(struct list_head *net_list)
@@ -10916,18 +11324,12 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- /* To prevent network device cleanup code from dereferencing
- * loopback devices or network devices that have been freed
- * wait here for all pending unregistrations to complete,
- * before unregistring the loopback device and allowing the
- * network namespace be freed.
- *
- * The netdev todo list containing all network devices
- * unregistrations that happen in default_device_exit_batch
- * will run in the rtnl_unlock() at the end of
- * default_device_exit_batch.
- */
- rtnl_lock_unregistering(net_list);
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ default_device_exit_net(net);
+ cond_resched();
+ }
+
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10941,7 +11343,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
static struct pernet_operations __net_initdata default_device_ops = {
- .exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};
@@ -10996,6 +11397,8 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
sd->cpu = i;
#endif
+ INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
+ spin_lock_init(&sd->defer_lock);
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
diff --git a/net/core/dev.h b/net/core/dev.h
new file mode 100644
index 000000000000..cbb8a925175a
--- /dev/null
+++ b/net/core/dev.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_DEV_H
+#define _NET_CORE_DEV_H
+
+#include <linux/types.h>
+
+struct net;
+struct net_device;
+struct netdev_bpf;
+struct netdev_phys_item_id;
+struct netlink_ext_ack;
+
+/* Random bits of netdevice that don't need to be exposed */
+#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
+struct sd_flow_limit {
+ u64 count;
+ unsigned int num_buckets;
+ unsigned int history_head;
+ u16 history[FLOW_LIMIT_HISTORY];
+ u8 buckets[];
+};
+
+extern int netdev_flow_limit_table_len;
+
+#ifdef CONFIG_PROC_FS
+int __init dev_proc_init(void);
+#else
+#define dev_proc_init() 0
+#endif
+
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_run_queue(void);
+
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
+void dev_addr_check(struct net_device *dev);
+
+/* sysctls not referred to from outside net/core/ */
+extern int netdev_budget;
+extern unsigned int netdev_budget_usecs;
+extern unsigned int sysctl_skb_defer_max;
+extern int netdev_tstamp_prequeue;
+extern int netdev_unregister_timeout_secs;
+extern int weight_p;
+extern int dev_weight_rx_bias;
+extern int dev_weight_tx_bias;
+
+/* rtnl helpers */
+extern struct list_head net_todo_list;
+void netdev_run_todo(void);
+
+/* netdev management, shared between various uAPI entry points */
+struct netdev_name_node {
+ struct hlist_node hlist;
+ struct list_head list;
+ struct net_device *dev;
+ const char *name;
+};
+
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_change_name(struct net_device *dev, const char *newname);
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
+
+int dev_validate_mtu(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+
+int dev_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len);
+
+int dev_change_proto_down(struct net_device *dev, bool proto_down);
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value);
+
+typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags);
+
+int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len);
+void dev_set_group(struct net_device *dev, int new_group);
+int dev_change_carrier(struct net_device *dev, bool new_carrier);
+
+void __dev_set_rx_mode(struct net_device *dev);
+
+static inline void netif_set_gso_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_size, size);
+}
+
+static inline void netif_set_gso_max_segs(struct net_device *dev,
+ unsigned int segs)
+{
+ /* dev->gso_max_segs is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_segs, segs);
+}
+
+static inline void netif_set_gro_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* This pairs with the READ_ONCE() in skb_gro_receive() */
+ WRITE_ONCE(dev->gro_max_size, size);
+}
+
+#endif
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index bead38ca50bd..baa63dee2829 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -12,6 +12,8 @@
#include <linux/export.h>
#include <linux/list.h>
+#include "dev.h"
+
/*
* General list handling functions
*/
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 1b807d119da5..7674bb9f3076 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -10,6 +10,8 @@
#include <net/dsa.h>
#include <net/wext.h>
+#include "dev.h"
+
/*
* Map an interface index to its name (SIOCGIFNAME)
*/
@@ -382,10 +384,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
return -ENODEV;
if (!netif_is_bridge_master(dev))
return -EOPNOTSUPP;
- dev_hold_track(dev, &dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &dev_tracker, GFP_KERNEL);
rtnl_unlock();
err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
- dev_put_track(dev, &dev_tracker);
+ netdev_put(dev, &dev_tracker);
rtnl_lock();
return err;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index fcd9f6d85cf1..89baa7c0938b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -54,6 +54,8 @@ struct devlink {
struct list_head trap_list;
struct list_head trap_group_list;
struct list_head trap_policer_list;
+ struct list_head linecard_list;
+ struct mutex linecards_lock; /* protects linecard_list */
const struct devlink_ops *ops;
u64 features;
struct xarray snapshot_ids;
@@ -64,12 +66,32 @@ struct devlink {
* port, sb, dpipe, resource, params, region, traps and more.
*/
struct mutex lock;
+ struct lock_class_key lock_key;
u8 reload_failed:1;
refcount_t refcount;
struct completion comp;
+ struct rcu_head rcu;
char priv[] __aligned(NETDEV_ALIGN);
};
+struct devlink_linecard_ops;
+struct devlink_linecard_type;
+
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ refcount_t refcount;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+ struct devlink *nested_devlink;
+};
+
/**
* struct devlink_resource - devlink resource
* @name: name of the resource
@@ -179,8 +201,13 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
DEVLINK_PORT_FN_STATE_ACTIVE),
};
+static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
+};
+
static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define DEVLINK_REGISTERED XA_MARK_1
+#define DEVLINK_UNREGISTERING XA_MARK_2
/* devlink instances are open to the access from the user space after
* devlink_register() call. Such logical barrier allows us to have certain
@@ -198,24 +225,27 @@ static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
-/* devlink_mutex
- *
- * An overall lock guarding every operation coming from userspace.
- * It also guards devlink devices list and it is taken when
- * driver registers/unregisters it.
- */
-static DEFINE_MUTEX(devlink_mutex);
-
struct net *devlink_net(const struct devlink *devlink)
{
return read_pnet(&devlink->_net);
}
EXPORT_SYMBOL_GPL(devlink_net);
+static void __devlink_put_rcu(struct rcu_head *head)
+{
+ struct devlink *devlink = container_of(head, struct devlink, rcu);
+
+ complete(&devlink->comp);
+}
+
void devlink_put(struct devlink *devlink)
{
if (refcount_dec_and_test(&devlink->refcount))
- complete(&devlink->comp);
+ /* Make sure unregister operation that may await the completion
+ * is unblocked only after all users are after the end of
+ * RCU grace period.
+ */
+ call_rcu(&devlink->rcu, __devlink_put_rcu);
}
struct devlink *__must_check devlink_try_get(struct devlink *devlink)
@@ -225,12 +255,103 @@ struct devlink *__must_check devlink_try_get(struct devlink *devlink)
return NULL;
}
+void devl_assert_locked(struct devlink *devlink)
+{
+ lockdep_assert_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_assert_locked);
+
+#ifdef CONFIG_LOCKDEP
+/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */
+bool devl_lock_is_held(struct devlink *devlink)
+{
+ return lockdep_is_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock_is_held);
+#endif
+
+void devl_lock(struct devlink *devlink)
+{
+ mutex_lock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock);
+
+int devl_trylock(struct devlink *devlink)
+{
+ return mutex_trylock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_trylock);
+
+void devl_unlock(struct devlink *devlink)
+{
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_unlock);
+
+static struct devlink *
+devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter,
+ void * (*xa_find_fn)(struct xarray *, unsigned long *,
+ unsigned long, xa_mark_t))
+{
+ struct devlink *devlink;
+
+ rcu_read_lock();
+retry:
+ devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED);
+ if (!devlink)
+ goto unlock;
+
+ /* In case devlink_unregister() was already called and "unregistering"
+ * mark was set, do not allow to get a devlink reference here.
+ * This prevents live-lock of devlink_unregister() wait for completion.
+ */
+ if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING))
+ goto retry;
+
+ /* For a possible retry, the xa_find_after() should be always used */
+ xa_find_fn = xa_find_after;
+ if (!devlink_try_get(devlink))
+ goto retry;
+ if (!net_eq(devlink_net(devlink), net)) {
+ devlink_put(devlink);
+ goto retry;
+ }
+unlock:
+ rcu_read_unlock();
+ return devlink;
+}
+
+static struct devlink *devlinks_xa_find_get_first(struct net *net,
+ unsigned long *indexp,
+ xa_mark_t filter)
+{
+ return devlinks_xa_find_get(net, indexp, filter, xa_find);
+}
+
+static struct devlink *devlinks_xa_find_get_next(struct net *net,
+ unsigned long *indexp,
+ xa_mark_t filter)
+{
+ return devlinks_xa_find_get(net, indexp, filter, xa_find_after);
+}
+
+/* Iterate over devlink pointers which were possible to get reference to.
+ * devlink_put() needs to be called for each iterated devlink pointer
+ * in loop body in order to release the reference.
+ */
+#define devlinks_xa_for_each_get(net, index, devlink, filter) \
+ for (index = 0, \
+ devlink = devlinks_xa_find_get_first(net, &index, filter); \
+ devlink; devlink = devlinks_xa_find_get_next(net, &index, filter))
+
+#define devlinks_xa_for_each_registered_get(net, index, devlink) \
+ devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED)
+
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
struct devlink *devlink;
unsigned long index;
- bool found = false;
char *busname;
char *devname;
@@ -240,23 +361,23 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
- lockdep_assert_held(&devlink_mutex);
-
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
if (strcmp(devlink->dev->bus->name, busname) == 0 &&
- strcmp(dev_name(devlink->dev), devname) == 0 &&
- net_eq(devlink_net(devlink), net)) {
- found = true;
- break;
- }
+ strcmp(dev_name(devlink->dev), devname) == 0)
+ return devlink;
+ devlink_put(devlink);
}
- if (!found || !devlink_try_get(devlink))
- devlink = ERR_PTR(-ENODEV);
-
- return devlink;
+ return ERR_PTR(-ENODEV);
}
+#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
+ WARN_ON_ONCE(!(devlink_port)->registered)
+#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
+ WARN_ON_ONCE((devlink_port)->registered)
+#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \
+ WARN_ON_ONCE(!(devlink_port)->initialized)
+
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
unsigned int port_index)
{
@@ -370,6 +491,58 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
return ERR_PTR(-EINVAL);
}
+static struct devlink_linecard *
+devlink_linecard_get_by_index(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ struct devlink_linecard *devlink_linecard;
+
+ list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
+ if (devlink_linecard->index == linecard_index)
+ return devlink_linecard;
+ }
+ return NULL;
+}
+
+static bool devlink_linecard_index_exists(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ return devlink_linecard_get_by_index(devlink, linecard_index);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
+ u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
+ struct devlink_linecard *linecard;
+
+ mutex_lock(&devlink->linecards_lock);
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (linecard)
+ refcount_inc(&linecard->refcount);
+ mutex_unlock(&devlink->linecards_lock);
+ if (!linecard)
+ return ERR_PTR(-ENODEV);
+ return linecard;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_linecard_get_from_attrs(devlink, info->attrs);
+}
+
+static void devlink_linecard_put(struct devlink_linecard *linecard)
+{
+ if (refcount_dec_and_test(&linecard->refcount)) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ }
+}
+
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -536,6 +709,10 @@ struct devlink_region {
const struct devlink_region_ops *ops;
const struct devlink_port_region_ops *port_ops;
};
+ struct mutex snapshot_lock; /* protects snapshot_list,
+ * max_snapshots and cur_snapshots
+ * consistency.
+ */
struct list_head snapshot_list;
u32 max_snapshots;
u32 cur_snapshots;
@@ -590,28 +767,20 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
-
-/* The per devlink instance lock is taken by default in the pre-doit
- * operation, yet several commands do not require this. The global
- * devlink lock is taken and protects from disruption by user-calls.
- */
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
+#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_linecard *linecard;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
- mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
- if (IS_ERR(devlink)) {
- mutex_unlock(&devlink_mutex);
+ if (IS_ERR(devlink))
return PTR_ERR(devlink);
- }
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
info->user_ptr[0] = devlink;
if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
@@ -642,27 +811,35 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
goto unlock;
}
info->user_ptr[1] = rate_node;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard)) {
+ err = PTR_ERR(linecard);
+ goto unlock;
+ }
+ info->user_ptr[1] = linecard;
}
return 0;
unlock:
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
return err;
}
static void devlink_nl_post_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_linecard *linecard;
struct devlink *devlink;
devlink = info->user_ptr[0];
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
- mutex_unlock(&devlink->lock);
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = info->user_ptr[1];
+ devlink_linecard_put(linecard);
+ }
+ devl_unlock(devlink);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
}
static struct genl_family devlink_nl_family;
@@ -684,6 +861,24 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ struct nlattr *nested_attr;
+
+ nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
+ if (!nested_attr)
+ return -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nested_attr);
+ return -EMSGSIZE;
+}
+
struct devlink_reload_combination {
enum devlink_reload_action action;
enum devlink_reload_limit limit;
@@ -1131,6 +1326,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
goto nla_put_failure;
if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
goto nla_put_failure;
+ if (devlink_port->linecard &&
+ nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
+ devlink_port->linecard->index))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -1205,15 +1404,8 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
u32 id = NETLINK_CB(cb->skb).portid;
@@ -1226,18 +1418,16 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, NULL);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
if (err != -EMSGSIZE)
return err;
@@ -1308,16 +1498,7 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) {
- devlink_put(devlink);
- continue;
- }
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
if (idx < start) {
idx++;
devlink_put(devlink);
@@ -1333,8 +1514,6 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
idx++;
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -1371,15 +1550,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
if (idx < start) {
idx++;
@@ -1391,19 +1563,16 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->extack);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -1541,35 +1710,20 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
return 0;
}
-static int devlink_port_split(struct devlink *devlink, u32 port_index,
- u32 count, struct netlink_ext_ack *extack)
-
-{
- if (devlink->ops->port_split)
- return devlink->ops->port_split(devlink, port_index, count,
- extack);
- return -EOPNOTSUPP;
-}
-
static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
struct genl_info *info)
{
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *devlink_port;
- u32 port_index;
u32 count;
- if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] ||
- !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
return -EINVAL;
+ if (!devlink->ops->port_split)
+ return -EOPNOTSUPP;
- devlink_port = devlink_port_get_from_info(devlink, info);
- port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
- if (IS_ERR(devlink_port))
- return -EINVAL;
-
if (!devlink_port->attrs.splittable) {
/* Split ports cannot be split. */
if (devlink_port->attrs.split)
@@ -1584,34 +1738,24 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
return -EINVAL;
}
- return devlink_port_split(devlink, port_index, count, info->extack);
-}
-
-static int devlink_port_unsplit(struct devlink *devlink, u32 port_index,
- struct netlink_ext_ack *extack)
-
-{
- if (devlink->ops->port_unsplit)
- return devlink->ops->port_unsplit(devlink, port_index, extack);
- return -EOPNOTSUPP;
+ return devlink->ops->port_split(devlink, devlink_port, count,
+ info->extack);
}
static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
struct genl_info *info)
{
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
- u32 port_index;
- if (!info->attrs[DEVLINK_ATTR_PORT_INDEX])
- return -EINVAL;
-
- port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
- return devlink_port_unsplit(devlink, port_index, info->extack);
+ if (!devlink->ops->port_unsplit)
+ return -EOPNOTSUPP;
+ return devlink->ops->port_unsplit(devlink, devlink_port, info->extack);
}
-static int devlink_port_new_notifiy(struct devlink *devlink,
- unsigned int port_index,
- struct genl_info *info)
+static int devlink_port_new_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct genl_info *info)
{
struct devlink_port *devlink_port;
struct sk_buff *msg;
@@ -1621,7 +1765,7 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
if (!msg)
return -ENOMEM;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
devlink_port = devlink_port_get_by_index(devlink, port_index);
if (!devlink_port) {
err = -ENODEV;
@@ -1633,12 +1777,9 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
if (err)
goto out;
- err = genlmsg_reply(msg, info);
- mutex_unlock(&devlink->lock);
- return err;
+ return genlmsg_reply(msg, info);
out:
- mutex_unlock(&devlink->lock);
nlmsg_free(msg);
return err;
}
@@ -1686,7 +1827,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
if (err)
return err;
- err = devlink_port_new_notifiy(devlink, new_port_index, info);
+ err = devlink_port_new_notify(devlink, new_port_index, info);
if (err && err != -ENODEV) {
/* Fail to send the response; destroy newly created port. */
devlink->ops->port_del(devlink, new_port_index, extack);
@@ -1704,7 +1845,7 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
if (!devlink->ops->port_del)
return -EOPNOTSUPP;
- if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) {
NL_SET_ERR_MSG_MOD(extack, "Port index is not specified");
return -EINVAL;
}
@@ -1962,6 +2103,316 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
return err;
}
+struct devlink_linecard_type {
+ const char *type;
+ const void *priv;
+};
+
+static int devlink_nl_linecard_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_linecard *linecard,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_linecard_type *linecard_type;
+ struct nlattr *attr;
+ void *hdr;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
+ goto nla_put_failure;
+ if (linecard->type &&
+ nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
+ goto nla_put_failure;
+
+ if (linecard->types_count) {
+ attr = nla_nest_start(msg,
+ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
+ if (!attr)
+ goto nla_put_failure;
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
+ linecard_type->type)) {
+ nla_nest_cancel(msg, attr);
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, attr);
+ }
+
+ if (linecard->nested_devlink &&
+ devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_linecard_notify(struct devlink_linecard *linecard,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
+ cmd != DEVLINK_CMD_LINECARD_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
+ NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_linecard *linecard = info->user_ptr[1];
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_linecard *linecard;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ unsigned long index;
+ int idx = 0;
+ int err;
+
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ mutex_lock(&devlink->linecards_lock);
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_put(devlink);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_put(devlink);
+ }
+out:
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static struct devlink_linecard_type *
+devlink_linecard_type_lookup(struct devlink_linecard *linecard,
+ const char *type)
+{
+ struct devlink_linecard_type *linecard_type;
+ int i;
+
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (!strcmp(type, linecard_type->type))
+ return linecard_type;
+ }
+ return NULL;
+}
+
+static int devlink_linecard_type_set(struct devlink_linecard *linecard,
+ const char *type,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_linecard_ops *ops = linecard->ops;
+ struct devlink_linecard_type *linecard_type;
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+
+ linecard_type = devlink_linecard_type_lookup(linecard, type);
+ if (!linecard_type) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
+ linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned");
+ err = -EBUSY;
+ /* Check if the line card is provisioned in the same
+ * way the user asks. In case it is, make the operation
+ * to return success.
+ */
+ if (ops->same_provision &&
+ ops->same_provision(linecard, linecard->priv,
+ linecard_type->type,
+ linecard_type->priv))
+ err = 0;
+ goto out;
+ }
+
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
+ linecard->type = linecard_type->type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = ops->provision(linecard, linecard->priv, linecard_type->type,
+ linecard_type->priv, extack);
+ if (err) {
+ /* Provisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ err = 0;
+ goto out;
+ }
+
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned");
+ err = 0;
+ goto out;
+ }
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = linecard->ops->unprovision(linecard, linecard->priv,
+ extack);
+ if (err) {
+ /* Unprovisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_linecard *linecard = info->user_ptr[1];
+ struct netlink_ext_ack *extack = info->extack;
+ int err;
+
+ if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
+ const char *type;
+
+ type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
+ if (strcmp(type, "")) {
+ err = devlink_linecard_type_set(linecard, type, extack);
+ if (err)
+ return err;
+ } else {
+ err = devlink_linecard_type_unset(linecard, extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -2037,15 +2488,8 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
if (idx < start) {
idx++;
@@ -2057,19 +2501,16 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -2189,16 +2630,11 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
- !devlink->ops->sb_pool_get)
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (!devlink->ops->sb_pool_get)
goto retry;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
devlink_sb,
@@ -2207,18 +2643,16 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
}
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -2263,7 +2697,7 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
if (err)
return err;
- if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
return -EINVAL;
size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
@@ -2410,16 +2844,11 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
- !devlink->ops->sb_port_pool_get)
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (!devlink->ops->sb_port_pool_get)
goto retry;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_port_pool_get_dumpit(msg, start, &idx,
devlink, devlink_sb,
@@ -2428,18 +2857,16 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
}
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -2480,7 +2907,7 @@ static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
if (err)
return err;
- if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
return -EINVAL;
threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
@@ -2659,16 +3086,11 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
- !devlink->ops->sb_tc_pool_bind_get)
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (!devlink->ops->sb_tc_pool_bind_get)
goto retry;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
devlink,
@@ -2678,18 +3100,16 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
}
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
retry:
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -2743,7 +3163,7 @@ static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
if (err)
return err;
- if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
return -EINVAL;
threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
@@ -2866,15 +3286,11 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
{
struct devlink_rate *devlink_rate;
- /* Take the lock to sync with devlink_rate_nodes_destroy() */
- mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
- mutex_unlock(&devlink->lock);
NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
return -EBUSY;
}
- mutex_unlock(&devlink->lock);
return 0;
}
@@ -3436,7 +3852,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
struct devlink_dpipe_table *table;
const char *table_name;
- if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
return -EINVAL;
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
@@ -3620,8 +4036,9 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
const char *table_name;
bool counters_enable;
- if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] ||
- !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
+ GENL_REQ_ATTR_CHECK(info,
+ DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
return -EINVAL;
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
@@ -3710,8 +4127,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
u64 size;
int err;
- if (!info->attrs[DEVLINK_ATTR_RESOURCE_ID] ||
- !info->attrs[DEVLINK_ATTR_RESOURCE_SIZE])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
return -EINVAL;
resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
@@ -4333,10 +4750,76 @@ void devlink_flash_update_timeout_notify(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+struct devlink_info_req {
+ struct sk_buff *msg;
+ void (*version_cb)(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv);
+ void *version_cb_priv;
+};
+
+struct devlink_flash_component_lookup_ctx {
+ const char *lookup_name;
+ bool lookup_name_found;
+};
+
+static void
+devlink_flash_component_lookup_cb(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv)
+{
+ struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
+
+ if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
+ lookup_ctx->lookup_name_found)
+ return;
+
+ lookup_ctx->lookup_name_found =
+ !strcmp(lookup_ctx->lookup_name, version_name);
+}
+
+static int devlink_flash_component_get(struct devlink *devlink,
+ struct nlattr *nla_component,
+ const char **p_component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_flash_component_lookup_ctx lookup_ctx = {};
+ struct devlink_info_req req = {};
+ const char *component;
+ int ret;
+
+ if (!nla_component)
+ return 0;
+
+ component = nla_data(nla_component);
+
+ if (!devlink->ops->info_get) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ lookup_ctx.lookup_name = component;
+ req.version_cb = devlink_flash_component_lookup_cb;
+ req.version_cb_priv = &lookup_ctx;
+
+ ret = devlink->ops->info_get(devlink, &req, NULL);
+ if (ret)
+ return ret;
+
+ if (!lookup_ctx.lookup_name_found) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "selected component is not supported by this device");
+ return -EINVAL;
+ }
+ *p_component = component;
+ return 0;
+}
+
static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
struct genl_info *info)
{
- struct nlattr *nla_component, *nla_overwrite_mask, *nla_file_name;
+ struct nlattr *nla_overwrite_mask, *nla_file_name;
struct devlink_flash_update_params params = {};
struct devlink *devlink = info->user_ptr[0];
const char *file_name;
@@ -4346,20 +4829,16 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
if (!devlink->ops->flash_update)
return -EOPNOTSUPP;
- if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
return -EINVAL;
- supported_params = devlink->ops->supported_flash_update_params;
+ ret = devlink_flash_component_get(devlink,
+ info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
+ &params.component, info->extack);
+ if (ret)
+ return ret;
- nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
- if (nla_component) {
- if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT)) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_component,
- "component update is not supported by this device");
- return -EOPNOTSUPP;
- }
- params.component = nla_data(nla_component);
- }
+ supported_params = devlink->ops->supported_flash_update_params;
nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
if (nla_overwrite_mask) {
@@ -4391,6 +4870,202 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
return ret;
}
+static int
+devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
+ u32 portid, u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *selftests;
+ void *hdr;
+ int err;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
+ DEVLINK_CMD_SELFTESTS_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto err_cancel_msg;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ if (devlink->ops->selftest_check(devlink, i, extack)) {
+ err = nla_put_flag(msg, i);
+ if (err)
+ goto err_cancel_msg;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
+ info->snd_seq, 0, info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink *devlink;
+ int start = cb->args[0];
+ unsigned long index;
+ int idx = 0;
+ int err = 0;
+
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ if (idx < start || !devlink->ops->selftest_check)
+ goto inc;
+
+ devl_lock(devlink);
+ err = devlink_nl_selftests_fill(msg, devlink,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+ devl_unlock(devlink);
+ if (err) {
+ devlink_put(devlink);
+ break;
+ }
+inc:
+ idx++;
+ devlink_put(devlink);
+ }
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
+ enum devlink_selftest_status test_status)
+{
+ struct nlattr *result_attr;
+
+ result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
+ if (!result_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
+ test_status))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, result_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, result_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_selftests_run(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *attrs, *selftests;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
+ return -EINVAL;
+
+ attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
+ devlink_selftest_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto genlmsg_cancel;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ enum devlink_selftest_status test_status;
+
+ if (nla_get_flag(tb[i])) {
+ if (!devlink->ops->selftest_check(devlink, i,
+ info->extack)) {
+ if (devlink_selftest_result_put(msg, i,
+ DEVLINK_SELFTEST_STATUS_SKIP))
+ goto selftests_nest_cancel;
+ continue;
+ }
+
+ test_status = devlink->ops->selftest_run(devlink, i,
+ info->extack);
+ if (devlink_selftest_result_put(msg, i, test_status))
+ goto selftests_nest_cancel;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+selftests_nest_cancel:
+ nla_nest_cancel(msg, selftests);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return err;
+}
+
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -4540,7 +5215,7 @@ static int devlink_param_get(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->get)
+ if (!param->get || devlink->reload_failed)
return -EOPNOTSUPP;
return param->get(devlink, param->id, ctx);
}
@@ -4549,7 +5224,7 @@ static int devlink_param_set(struct devlink *devlink,
const struct devlink_param *param,
struct devlink_param_gset_ctx *ctx)
{
- if (!param->set)
+ if (!param->set || devlink->reload_failed)
return -EOPNOTSUPP;
return param->set(devlink, param->id, ctx);
}
@@ -4750,15 +5425,8 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(param_item, &devlink->param_list, list) {
if (idx < start) {
idx++;
@@ -4772,19 +5440,16 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
if (err != -EMSGSIZE)
return err;
@@ -4796,7 +5461,7 @@ static int
devlink_param_type_get_from_info(struct genl_info *info,
enum devlink_param_type *param_type)
{
- if (!info->attrs[DEVLINK_ATTR_PARAM_TYPE])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
return -EINVAL;
switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
@@ -4873,7 +5538,7 @@ devlink_param_get_from_info(struct list_head *param_list,
{
char *param_name;
- if (!info->attrs[DEVLINK_ATTR_PARAM_NAME])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
return NULL;
param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
@@ -4939,7 +5604,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
return err;
}
- if (!info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
return -EINVAL;
cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
if (!devlink_param_cmode_is_supported(param, cmode))
@@ -4977,99 +5642,22 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
- struct devlink_param_item *param_item;
- struct devlink_port *devlink_port;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
- list_for_each_entry(devlink_port, &devlink->port_list, list) {
- list_for_each_entry(param_item,
- &devlink_port->param_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_param_fill(msg,
- devlink_port->devlink,
- devlink_port->index, param_item,
- DEVLINK_CMD_PORT_PARAM_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- mutex_unlock(&devlink->lock);
- devlink_put(devlink);
- goto out;
- }
- idx++;
- }
- }
- mutex_unlock(&devlink->lock);
-retry:
- devlink_put(devlink);
- }
-out:
- mutex_unlock(&devlink_mutex);
-
- if (err != -EMSGSIZE)
- return err;
-
- cb->args[0] = idx;
+ NL_SET_ERR_MSG_MOD(cb->extack, "Port params are not supported");
return msg->len;
}
static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink_param_item *param_item;
- struct sk_buff *msg;
- int err;
-
- param_item = devlink_param_get_from_info(&devlink_port->param_list,
- info);
- if (!param_item)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_param_fill(msg, devlink_port->devlink,
- devlink_port->index, param_item,
- DEVLINK_CMD_PORT_PARAM_GET,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
+ NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported");
+ return -EINVAL;
}
static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[1];
-
- return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list, info,
- DEVLINK_CMD_PORT_PARAM_NEW);
+ NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported");
+ return -EINVAL;
}
static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
@@ -5269,21 +5857,28 @@ static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
{
unsigned long count;
void *p;
+ int err;
- lockdep_assert_held(&devlink->lock);
-
+ xa_lock(&devlink->snapshot_ids);
p = xa_load(&devlink->snapshot_ids, id);
- if (WARN_ON(!p))
- return -EINVAL;
+ if (WARN_ON(!p)) {
+ err = -EINVAL;
+ goto unlock;
+ }
- if (WARN_ON(!xa_is_value(p)))
- return -EINVAL;
+ if (WARN_ON(!xa_is_value(p))) {
+ err = -EINVAL;
+ goto unlock;
+ }
count = xa_to_value(p);
count++;
- return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_KERNEL));
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC));
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
}
/**
@@ -5306,25 +5901,26 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
unsigned long count;
void *p;
- lockdep_assert_held(&devlink->lock);
-
+ xa_lock(&devlink->snapshot_ids);
p = xa_load(&devlink->snapshot_ids, id);
if (WARN_ON(!p))
- return;
+ goto unlock;
if (WARN_ON(!xa_is_value(p)))
- return;
+ goto unlock;
count = xa_to_value(p);
if (count > 1) {
count--;
- xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_KERNEL);
+ __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC);
} else {
/* If this was the last user, we can erase this id */
- xa_erase(&devlink->snapshot_ids, id);
+ __xa_erase(&devlink->snapshot_ids, id);
}
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
}
/**
@@ -5345,13 +5941,17 @@ static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
*/
static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
{
- lockdep_assert_held(&devlink->lock);
+ int err;
- if (xa_load(&devlink->snapshot_ids, id))
+ xa_lock(&devlink->snapshot_ids);
+ if (xa_load(&devlink->snapshot_ids, id)) {
+ xa_unlock(&devlink->snapshot_ids);
return -EEXIST;
-
- return xa_err(xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
- GFP_KERNEL));
+ }
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
+ GFP_ATOMIC));
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
}
/**
@@ -5372,8 +5972,6 @@ static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
*/
static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
{
- lockdep_assert_held(&devlink->lock);
-
return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
xa_limit_32b, GFP_KERNEL);
}
@@ -5386,7 +5984,7 @@ static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
* Multiple snapshots can be created on a region.
* The @snapshot_id should be obtained using the getter function.
*
- * Must be called only while holding the devlink instance lock.
+ * Must be called only while holding the region snapshot lock.
*
* @region: devlink region of the snapshot
* @data: snapshot data
@@ -5400,7 +5998,7 @@ __devlink_region_snapshot_create(struct devlink_region *region,
struct devlink_snapshot *snapshot;
int err;
- lockdep_assert_held(&devlink->lock);
+ lockdep_assert_held(&region->snapshot_lock);
/* check if region can hold one more snapshot */
if (region->cur_snapshots == region->max_snapshots)
@@ -5438,7 +6036,7 @@ static void devlink_region_snapshot_del(struct devlink_region *region,
{
struct devlink *devlink = region->devlink;
- lockdep_assert_held(&devlink->lock);
+ lockdep_assert_held(&region->snapshot_lock);
devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
region->cur_snapshots--;
@@ -5459,7 +6057,7 @@ static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
unsigned int index;
int err;
- if (!info->attrs[DEVLINK_ATTR_REGION_NAME])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
return -EINVAL;
if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
@@ -5532,7 +6130,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
struct devlink_port *port;
int err = 0;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
list_for_each_entry(region, &devlink->region_list, list) {
if (*idx < start) {
(*idx)++;
@@ -5556,7 +6154,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
}
out:
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return err;
}
@@ -5569,23 +6167,14 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
&idx, start);
-retry:
devlink_put(devlink);
if (err)
goto out;
}
out:
- mutex_unlock(&devlink_mutex);
cb->args[0] = idx;
return msg->len;
}
@@ -5601,8 +6190,8 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
unsigned int index;
u32 snapshot_id;
- if (!info->attrs[DEVLINK_ATTR_REGION_NAME] ||
- !info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
+ GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
return -EINVAL;
region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
@@ -5624,11 +6213,15 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
if (!region)
return -EINVAL;
+ mutex_lock(&region->snapshot_lock);
snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot)
+ if (!snapshot) {
+ mutex_unlock(&region->snapshot_lock);
return -EINVAL;
+ }
devlink_region_snapshot_del(region, snapshot);
+ mutex_unlock(&region->snapshot_lock);
return 0;
}
@@ -5646,7 +6239,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
u8 *data;
int err;
- if (!info->attrs[DEVLINK_ATTR_REGION_NAME]) {
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
NL_SET_ERR_MSG_MOD(info->extack, "No region name provided");
return -EINVAL;
}
@@ -5676,9 +6269,12 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
+ mutex_lock(&region->snapshot_lock);
+
if (region->cur_snapshots == region->max_snapshots) {
NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots");
- return -ENOSPC;
+ err = -ENOSPC;
+ goto unlock;
}
snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
@@ -5687,17 +6283,18 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use");
- return -EEXIST;
+ err = -EEXIST;
+ goto unlock;
}
err = __devlink_snapshot_id_insert(devlink, snapshot_id);
if (err)
- return err;
+ goto unlock;
} else {
err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
if (err) {
NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id");
- return err;
+ goto unlock;
}
}
@@ -5719,8 +6316,10 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
snapshot = devlink_region_snapshot_get_by_id(region,
snapshot_id);
- if (WARN_ON(!snapshot))
- return -EINVAL;
+ if (WARN_ON(!snapshot)) {
+ err = -EINVAL;
+ goto unlock;
+ }
msg = devlink_nl_region_notify_build(region, snapshot,
DEVLINK_CMD_REGION_NEW,
@@ -5735,16 +6334,20 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
goto err_notify;
}
+ mutex_unlock(&region->snapshot_lock);
return 0;
err_snapshot_create:
region->ops->destructor(data);
err_snapshot_capture:
__devlink_snapshot_id_decrement(devlink, snapshot_id);
+ mutex_unlock(&region->snapshot_lock);
return err;
err_notify:
devlink_region_snapshot_del(region, snapshot);
+unlock:
+ mutex_unlock(&region->snapshot_lock);
return err;
}
@@ -5839,14 +6442,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
start_offset = *((u64 *)&cb->args[0]);
- mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink)) {
- err = PTR_ERR(devlink);
- goto out_dev;
- }
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
!attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
@@ -5942,34 +6542,30 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
nla_nest_end(skb, chunks_attr);
genlmsg_end(skb, hdr);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
-
return skb->len;
nla_put_failure:
genlmsg_cancel(skb, hdr);
out_unlock:
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
-out_dev:
- mutex_unlock(&devlink_mutex);
return err;
}
-struct devlink_info_req {
- struct sk_buff *msg;
-};
-
int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
{
+ if (!req->msg)
+ return 0;
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
}
EXPORT_SYMBOL_GPL(devlink_info_driver_name_put);
int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
{
+ if (!req->msg)
+ return 0;
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
}
EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
@@ -5977,6 +6573,8 @@ EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
int devlink_info_board_serial_number_put(struct devlink_info_req *req,
const char *bsn)
{
+ if (!req->msg)
+ return 0;
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
bsn);
}
@@ -5984,11 +6582,19 @@ EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
static int devlink_info_version_put(struct devlink_info_req *req, int attr,
const char *version_name,
- const char *version_value)
+ const char *version_value,
+ enum devlink_info_version_type version_type)
{
struct nlattr *nest;
int err;
+ if (req->version_cb)
+ req->version_cb(version_name, version_type,
+ req->version_cb_priv);
+
+ if (!req->msg)
+ return 0;
+
nest = nla_nest_start_noflag(req->msg, attr);
if (!nest)
return -EMSGSIZE;
@@ -6017,7 +6623,8 @@ int devlink_info_version_fixed_put(struct devlink_info_req *req,
const char *version_value)
{
return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
- version_name, version_value);
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
}
EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
@@ -6026,25 +6633,49 @@ int devlink_info_version_stored_put(struct devlink_info_req *req,
const char *version_value)
{
return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value);
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
}
EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
+
int devlink_info_version_running_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value)
{
return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value);
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
}
EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+
static int
devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags, struct netlink_ext_ack *extack)
{
- struct devlink_info_req req;
+ struct devlink_info_req req = {};
void *hdr;
int err;
@@ -6103,23 +6734,16 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err = 0;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
if (idx < start || !devlink->ops->info_get)
goto inc;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
if (err == -EOPNOTSUPP)
err = 0;
else if (err) {
@@ -6128,10 +6752,8 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
}
inc:
idx++;
-retry:
devlink_put(devlink);
}
- mutex_unlock(&devlink_mutex);
if (err != -EMSGSIZE)
return err;
@@ -7120,6 +7742,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state prev_health_state;
struct devlink *devlink = reporter->devlink;
unsigned long recover_ts_threshold;
+ int ret;
/* write a log message of the current error */
WARN_ON(!msg);
@@ -7153,11 +7776,14 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
mutex_unlock(&reporter->dump_lock);
}
- if (reporter->auto_recover)
- return devlink_health_reporter_recover(reporter,
- priv_ctx, NULL);
+ if (!reporter->auto_recover)
+ return 0;
- return 0;
+ devl_lock(devlink);
+ ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL);
+ devl_unlock(devlink);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(devlink_health_report);
@@ -7206,18 +7832,13 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink))
- goto unlock;
+ return NULL;
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
devlink_put(devlink);
- mutex_unlock(&devlink_mutex);
return reporter;
-unlock:
- mutex_unlock(&devlink_mutex);
- return NULL;
}
void
@@ -7283,14 +7904,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry_rep;
-
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
mutex_lock(&devlink->reporters_lock);
list_for_each_entry(reporter, &devlink->reporter_list,
list) {
@@ -7310,18 +7924,11 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
idx++;
}
mutex_unlock(&devlink->reporters_lock);
-retry_rep:
devlink_put(devlink);
}
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry_port;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(port, &devlink->port_list, list) {
mutex_lock(&port->reporters_lock);
list_for_each_entry(reporter, &port->reporter_list, list) {
@@ -7336,7 +7943,7 @@ retry_rep:
cb->nlh->nlmsg_seq, NLM_F_MULTI);
if (err) {
mutex_unlock(&port->reporters_lock);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
@@ -7344,13 +7951,10 @@ retry_rep:
}
mutex_unlock(&port->reporters_lock);
}
- mutex_unlock(&devlink->lock);
-retry_port:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -7543,8 +8147,8 @@ static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
}
struct devlink_stats {
- u64 rx_bytes;
- u64 rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_packets;
struct u64_stats_sync syncp;
};
@@ -7701,12 +8305,12 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
cpu_stats = per_cpu_ptr(trap_stats, i);
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- rx_packets = cpu_stats->rx_packets;
- rx_bytes = cpu_stats->rx_bytes;
+ rx_packets = u64_stats_read(&cpu_stats->rx_packets);
+ rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
+ u64_stats_add(&stats->rx_packets, rx_packets);
+ u64_stats_add(&stats->rx_bytes, rx_bytes);
}
}
@@ -7724,11 +8328,13 @@ devlink_trap_group_stats_put(struct sk_buff *msg,
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- stats.rx_packets, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- stats.rx_bytes, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -7768,11 +8374,13 @@ static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- stats.rx_packets, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- stats.rx_bytes, DEVLINK_ATTR_PAD))
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -7879,15 +8487,8 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(trap_item, &devlink->trap_list, list) {
if (idx < start) {
idx++;
@@ -7899,19 +8500,16 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -8106,15 +8704,8 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(group_item, &devlink->trap_group_list,
list) {
if (idx < start) {
@@ -8127,19 +8718,16 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -8420,15 +9008,8 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
int idx = 0;
int err;
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->lock);
+ devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
+ devl_lock(devlink);
list_for_each_entry(policer_item, &devlink->trap_policer_list,
list) {
if (idx < start) {
@@ -8441,19 +9022,16 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
devlink_put(devlink);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
-retry:
+ devl_unlock(devlink);
devlink_put(devlink);
}
out:
- mutex_unlock(&devlink_mutex);
-
cb->args[0] = idx;
return msg->len;
}
@@ -8591,6 +9169,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -8645,26 +9226,37 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_unsplit_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_PORT_NEW,
.doit = devlink_nl_cmd_port_new_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_DEL,
.doit = devlink_nl_cmd_port_del_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .doit = devlink_nl_cmd_linecard_get_doit,
+ .dumpit = devlink_nl_cmd_linecard_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_SET,
+ .doit = devlink_nl_cmd_linecard_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
},
{
.cmd = DEVLINK_CMD_SB_GET,
@@ -8733,14 +9325,12 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_get_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_ESWITCH_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
@@ -8783,7 +9373,6 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_reload,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PARAM_GET,
@@ -8851,8 +9440,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -8860,24 +9448,21 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_recover_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
@@ -8891,16 +9476,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_test_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
},
{
.cmd = DEVLINK_CMD_FLASH_UPDATE,
@@ -8941,6 +9524,17 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_trap_policer_set_doit,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .doit = devlink_nl_cmd_selftests_get_doit,
+ .dumpit = devlink_nl_cmd_selftests_get_dumpit
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_RUN,
+ .doit = devlink_nl_cmd_selftests_run,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -8949,11 +9543,13 @@ static struct genl_family devlink_nl_family __ro_after_init = {
.maxattr = DEVLINK_ATTR_MAX,
.policy = devlink_nl_policy,
.netnsok = true,
+ .parallel_ops = true,
.pre_doit = devlink_nl_pre_doit,
.post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
.small_ops = devlink_nl_ops,
.n_small_ops = ARRAY_SIZE(devlink_nl_ops),
+ .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
};
@@ -9047,6 +9643,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->rate_list);
+ INIT_LIST_HEAD(&devlink->linecard_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@@ -9056,8 +9653,11 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
INIT_LIST_HEAD(&devlink->trap_list);
INIT_LIST_HEAD(&devlink->trap_group_list);
INIT_LIST_HEAD(&devlink->trap_policer_list);
+ lockdep_register_key(&devlink->lock_key);
mutex_init(&devlink->lock);
+ lockdep_set_class(&devlink->lock, &devlink->lock_key);
mutex_init(&devlink->reporters_lock);
+ mutex_init(&devlink->linecards_lock);
refcount_set(&devlink->refcount, 1);
init_completion(&devlink->comp);
@@ -9084,10 +9684,14 @@ static void devlink_notify_register(struct devlink *devlink)
struct devlink_param_item *param_item;
struct devlink_trap_item *trap_item;
struct devlink_port *devlink_port;
+ struct devlink_linecard *linecard;
struct devlink_rate *rate_node;
struct devlink_region *region;
devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+
list_for_each_entry(devlink_port, &devlink->port_list, list)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -9159,10 +9763,8 @@ void devlink_register(struct devlink *devlink)
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
/* Make sure that we are in .probe() routine */
- mutex_lock(&devlink_mutex);
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify_register(devlink);
- mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_register);
@@ -9176,13 +9778,13 @@ void devlink_unregister(struct devlink *devlink)
ASSERT_DEVLINK_REGISTERED(devlink);
/* Make sure that we are in .remove() routine */
+ xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
devlink_put(devlink);
wait_for_completion(&devlink->comp);
- mutex_lock(&devlink_mutex);
devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- mutex_unlock(&devlink_mutex);
+ xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
@@ -9195,8 +9797,10 @@ void devlink_free(struct devlink *devlink)
{
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ mutex_destroy(&devlink->linecards_lock);
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
+ lockdep_unregister_key(&devlink->lock_key);
WARN_ON(!list_empty(&devlink->trap_policer_list));
WARN_ON(!list_empty(&devlink->trap_group_list));
WARN_ON(!list_empty(&devlink->trap_list));
@@ -9207,6 +9811,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
@@ -9250,6 +9855,83 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
}
/**
+ * devlink_port_init() - Init devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ *
+ * Initialize essencial stuff that is needed for functions
+ * that may be called before devlink port registration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_init(struct devlink *devlink,
+ struct devlink_port *devlink_port)
+{
+ if (devlink_port->initialized)
+ return;
+ devlink_port->devlink = devlink;
+ INIT_LIST_HEAD(&devlink_port->region_list);
+ devlink_port->initialized = true;
+}
+EXPORT_SYMBOL_GPL(devlink_port_init);
+
+/**
+ * devlink_port_fini() - Deinitialize devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Deinitialize essencial stuff that is in use for functions
+ * that may be called after devlink port unregistration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_fini(struct devlink_port *devlink_port)
+{
+ WARN_ON(!list_empty(&devlink_port->region_list));
+}
+EXPORT_SYMBOL_GPL(devlink_port_fini);
+
+/**
+ * devl_port_register() - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ */
+int devl_port_register(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index)
+{
+ devl_assert_locked(devlink);
+
+ if (devlink_port_index_exists(devlink, port_index))
+ return -EEXIST;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port_init(devlink, devlink_port);
+ devlink_port->registered = true;
+ devlink_port->index = port_index;
+ spin_lock_init(&devlink_port->type_lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ mutex_init(&devlink_port->reporters_lock);
+ list_add_tail(&devlink_port->list, &devlink->port_list);
+
+ INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+ devlink_port_type_warn_schedule(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_port_register);
+
+/**
* devlink_port_register - Register devlink port
*
* @devlink: devlink
@@ -9261,51 +9943,54 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
* is convenient to be embedded inside user driver private structure.
* Note that the caller should take care of zeroing the devlink_port
* structure.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index)
{
- mutex_lock(&devlink->lock);
- if (devlink_port_index_exists(devlink, port_index)) {
- mutex_unlock(&devlink->lock);
- return -EEXIST;
- }
+ int err;
- WARN_ON(devlink_port->devlink);
- devlink_port->devlink = devlink;
- devlink_port->index = port_index;
- spin_lock_init(&devlink_port->type_lock);
- INIT_LIST_HEAD(&devlink_port->reporter_list);
- mutex_init(&devlink_port->reporters_lock);
- list_add_tail(&devlink_port->list, &devlink->port_list);
- INIT_LIST_HEAD(&devlink_port->param_list);
- INIT_LIST_HEAD(&devlink_port->region_list);
- mutex_unlock(&devlink->lock);
- INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
- devlink_port_type_warn_schedule(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
- return 0;
+ devl_lock(devlink);
+ err = devl_port_register(devlink, devlink_port, port_index);
+ devl_unlock(devlink);
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_port_register);
/**
- * devlink_port_unregister - Unregister devlink port
+ * devl_port_unregister() - Unregister devlink port
*
- * @devlink_port: devlink port
+ * @devlink_port: devlink port
*/
-void devlink_port_unregister(struct devlink_port *devlink_port)
+void devl_port_unregister(struct devlink_port *devlink_port)
{
- struct devlink *devlink = devlink_port->devlink;
+ lockdep_assert_held(&devlink_port->devlink->lock);
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- mutex_lock(&devlink->lock);
list_del(&devlink_port->list);
- mutex_unlock(&devlink->lock);
WARN_ON(!list_empty(&devlink_port->reporter_list));
- WARN_ON(!list_empty(&devlink_port->region_list));
mutex_destroy(&devlink_port->reporters_lock);
+ devlink_port->registered = false;
+}
+EXPORT_SYMBOL_GPL(devl_port_unregister);
+
+/**
+ * devlink_port_unregister - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_port_unregister(struct devlink_port *devlink_port)
+{
+ struct devlink *devlink = devlink_port->devlink;
+
+ devl_lock(devlink);
+ devl_port_unregister(devlink_port);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
@@ -9313,8 +9998,8 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type type,
void *type_dev)
{
- if (WARN_ON(!devlink_port->devlink))
- return;
+ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
devlink_port_type_warn_cancel(devlink_port);
spin_lock_bh(&devlink_port->type_lock);
devlink_port->type = type;
@@ -9433,8 +10118,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,
{
int ret;
- if (WARN_ON(devlink_port->devlink))
- return;
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
devlink_port->attrs = *attrs;
ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
if (ret)
@@ -9457,8 +10142,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->devlink))
- return;
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_PF);
if (ret)
@@ -9484,8 +10169,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->devlink))
- return;
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_VF);
if (ret)
@@ -9512,8 +10197,8 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->devlink))
- return;
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_SF);
if (ret)
@@ -9526,30 +10211,26 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
/**
- * devlink_rate_leaf_create - create devlink rate leaf
- *
+ * devl_rate_leaf_create - create devlink rate leaf
* @devlink_port: devlink port object to create rate object on
* @priv: driver private data
*
* Create devlink rate object of type leaf on provided @devlink_port.
- * Throws call trace if @devlink_port already has a devlink rate object.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
*/
-int
-devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
{
struct devlink *devlink = devlink_port->devlink;
struct devlink_rate *devlink_rate;
+ devl_assert_locked(devlink_port->devlink);
+
+ if (WARN_ON(devlink_port->devlink_rate))
+ return -EBUSY;
+
devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
if (!devlink_rate)
return -ENOMEM;
- mutex_lock(&devlink->lock);
- WARN_ON(devlink_port->devlink_rate);
devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
devlink_rate->devlink = devlink;
devlink_rate->devlink_port = devlink_port;
@@ -9557,54 +10238,49 @@ devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
list_add_tail(&devlink_rate->list, &devlink->rate_list);
devlink_port->devlink_rate = devlink_rate;
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
- mutex_unlock(&devlink->lock);
return 0;
}
-EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
+EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
/**
- * devlink_rate_leaf_destroy - destroy devlink rate leaf
+ * devl_rate_leaf_destroy - destroy devlink rate leaf
*
* @devlink_port: devlink port linked to the rate object
*
- * Context: Takes and release devlink->lock <mutex>.
+ * Destroy the devlink rate object of type leaf on provided @devlink_port.
*/
-void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
+void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
{
struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
- struct devlink *devlink = devlink_port->devlink;
+ devl_assert_locked(devlink_port->devlink);
if (!devlink_rate)
return;
- mutex_lock(&devlink->lock);
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
if (devlink_rate->parent)
refcount_dec(&devlink_rate->parent->refcnt);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
- mutex_unlock(&devlink->lock);
kfree(devlink_rate);
}
-EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
+EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
/**
- * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
- *
+ * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
* @devlink: devlink instance
*
* Unset parent for all rate objects and destroy all rate nodes
* on specified device.
- *
- * Context: Takes and release devlink->lock <mutex>.
*/
-void devlink_rate_nodes_destroy(struct devlink *devlink)
+void devl_rate_nodes_destroy(struct devlink *devlink)
{
static struct devlink_rate *devlink_rate, *tmp;
const struct devlink_ops *ops = devlink->ops;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
+
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
if (!devlink_rate->parent)
continue;
@@ -9625,9 +10301,23 @@ void devlink_rate_nodes_destroy(struct devlink *devlink)
kfree(devlink_rate);
}
}
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
+EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
+
+/**
+ * devlink_port_linecard_set - Link port with a linecard
+ *
+ * @devlink_port: devlink port
+ * @linecard: devlink linecard
+ */
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+ struct devlink_linecard *linecard)
+{
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port->linecard = linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
@@ -9640,7 +10330,12 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- n = snprintf(name, len, "p%u", attrs->phys.port_number);
+ if (devlink_port->linecard)
+ n = snprintf(name, len, "l%u",
+ devlink_port->linecard->index);
+ if (n < len)
+ n += snprintf(name + n, len - n, "p%u",
+ attrs->phys.port_number);
if (n < len && attrs->split)
n += snprintf(name + n, len - n, "s%u",
attrs->phys.split_subport_number);
@@ -9695,25 +10390,241 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
return 0;
}
-int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
- u32 size, u16 ingress_pools_count,
- u16 egress_pools_count, u16 ingress_tc_count,
- u16 egress_tc_count)
+static int devlink_linecard_types_init(struct devlink_linecard *linecard)
{
- struct devlink_sb *devlink_sb;
- int err = 0;
+ struct devlink_linecard_type *linecard_type;
+ unsigned int count;
+ int i;
- mutex_lock(&devlink->lock);
- if (devlink_sb_index_exists(devlink, sb_index)) {
- err = -EEXIST;
- goto unlock;
+ count = linecard->ops->types_count(linecard, linecard->priv);
+ linecard->types = kmalloc_array(count, sizeof(*linecard_type),
+ GFP_KERNEL);
+ if (!linecard->types)
+ return -ENOMEM;
+ linecard->types_count = count;
+
+ for (i = 0; i < count; i++) {
+ linecard_type = &linecard->types[i];
+ linecard->ops->types_get(linecard, linecard->priv, i,
+ &linecard_type->type,
+ &linecard_type->priv);
}
+ return 0;
+}
- devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
- if (!devlink_sb) {
- err = -ENOMEM;
- goto unlock;
+static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
+{
+ kfree(linecard->types);
+}
+
+/**
+ * devlink_linecard_create - Create devlink linecard
+ *
+ * @devlink: devlink
+ * @linecard_index: driver-specific numerical identifier of the linecard
+ * @ops: linecards ops
+ * @priv: user priv pointer
+ *
+ * Create devlink linecard instance with provided linecard index.
+ * Caller can use any indexing, even hw-related one.
+ *
+ * Return: Line card structure or an ERR_PTR() encoded error code.
+ */
+struct devlink_linecard *
+devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
+{
+ struct devlink_linecard *linecard;
+ int err;
+
+ if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
+ !ops->types_count || !ops->types_get))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&devlink->linecards_lock);
+ if (devlink_linecard_index_exists(devlink, linecard_index)) {
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(-EEXIST);
+ }
+
+ linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
+ if (!linecard) {
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(-ENOMEM);
}
+
+ linecard->devlink = devlink;
+ linecard->index = linecard_index;
+ linecard->ops = ops;
+ linecard->priv = priv;
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ mutex_init(&linecard->state_lock);
+
+ err = devlink_linecard_types_init(linecard);
+ if (err) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&linecard->list, &devlink->linecard_list);
+ refcount_set(&linecard->refcount, 1);
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ return linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_create);
+
+/**
+ * devlink_linecard_destroy - Destroy devlink linecard
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_destroy(struct devlink_linecard *linecard)
+{
+ struct devlink *devlink = linecard->devlink;
+
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+ mutex_lock(&devlink->linecards_lock);
+ list_del(&linecard->list);
+ devlink_linecard_types_fini(linecard);
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_linecard_put(linecard);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
+
+/**
+ * devlink_linecard_provision_set - Set provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ * @type: linecard type
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+ const char *type)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->type && strcmp(linecard->type, type));
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ linecard->type = type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
+
+/**
+ * devlink_linecard_provision_clear - Clear provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the unprovision() op call or
+ * as a result of the unprovision() op call asynchronously.
+ */
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
+
+/**
+ * devlink_linecard_provision_fail - Fail provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
+
+/**
+ * devlink_linecard_activate - Set linecard active
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_activate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
+ linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_activate);
+
+/**
+ * devlink_linecard_deactivate - Set linecard inactive
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_deactivate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ switch (linecard->state) {
+ case DEVLINK_LINECARD_STATE_ACTIVE:
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ break;
+ case DEVLINK_LINECARD_STATE_UNPROVISIONING:
+ /* Line card is being deactivated as part
+ * of unprovisioning flow.
+ */
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+
+/**
+ * devlink_linecard_nested_dl_set - Attach/detach nested devlink
+ * instance to linecard.
+ *
+ * @linecard: devlink linecard
+ * @nested_devlink: devlink instance to attach or NULL to detach
+ */
+void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->nested_devlink = nested_devlink;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
+
+int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ struct devlink_sb *devlink_sb;
+
+ lockdep_assert_held(&devlink->lock);
+
+ if (devlink_sb_index_exists(devlink, sb_index))
+ return -EEXIST;
+
+ devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+ if (!devlink_sb)
+ return -ENOMEM;
devlink_sb->index = sb_index;
devlink_sb->size = size;
devlink_sb->ingress_pools_count = ingress_pools_count;
@@ -9721,57 +10632,78 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
devlink_sb->ingress_tc_count = ingress_tc_count;
devlink_sb->egress_tc_count = egress_tc_count;
list_add_tail(&devlink_sb->list, &devlink->sb_list);
-unlock:
- mutex_unlock(&devlink->lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_sb_register);
+
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
+ egress_pools_count, ingress_tc_count,
+ egress_tc_count);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_sb_register);
-void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
{
struct devlink_sb *devlink_sb;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
WARN_ON(!devlink_sb);
list_del(&devlink_sb->list);
- mutex_unlock(&devlink->lock);
kfree(devlink_sb);
}
+EXPORT_SYMBOL_GPL(devl_sb_unregister);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ devl_lock(devlink);
+ devl_sb_unregister(devlink, sb_index);
+ devl_unlock(devlink);
+}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
/**
- * devlink_dpipe_headers_register - register dpipe headers
+ * devl_dpipe_headers_register - register dpipe headers
*
- * @devlink: devlink
- * @dpipe_headers: dpipe header array
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
*
- * Register the headers supported by hardware.
+ * Register the headers supported by hardware.
*/
-int devlink_dpipe_headers_register(struct devlink *devlink,
- struct devlink_dpipe_headers *dpipe_headers)
+void devl_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
{
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
devlink->dpipe_headers = dpipe_headers;
- mutex_unlock(&devlink->lock);
- return 0;
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
/**
- * devlink_dpipe_headers_unregister - unregister dpipe headers
+ * devl_dpipe_headers_unregister - unregister dpipe headers
*
- * @devlink: devlink
+ * @devlink: devlink
*
- * Unregister the headers supported by hardware.
+ * Unregister the headers supported by hardware.
*/
-void devlink_dpipe_headers_unregister(struct devlink *devlink)
+void devl_dpipe_headers_unregister(struct devlink *devlink)
{
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
devlink->dpipe_headers = NULL;
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
/**
* devlink_dpipe_table_counter_enabled - check if counter allocation
@@ -9805,38 +10737,33 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
/**
- * devlink_dpipe_table_register - register dpipe table
+ * devl_dpipe_table_register - register dpipe table
*
- * @devlink: devlink
- * @table_name: table name
- * @table_ops: table ops
- * @priv: priv
- * @counter_control_extern: external control for counters
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @counter_control_extern: external control for counters
*/
-int devlink_dpipe_table_register(struct devlink *devlink,
- const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
- void *priv, bool counter_control_extern)
+int devl_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
- int err = 0;
+
+ lockdep_assert_held(&devlink->lock);
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
- mutex_lock(&devlink->lock);
-
if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
- devlink)) {
- err = -EEXIST;
- goto unlock;
- }
+ devlink))
+ return -EEXIST;
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!table)
+ return -ENOMEM;
table->name = table_name;
table->table_ops = table_ops;
@@ -9844,77 +10771,69 @@ int devlink_dpipe_table_register(struct devlink *devlink,
table->counter_control_extern = counter_control_extern;
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
-unlock:
- mutex_unlock(&devlink->lock);
- return err;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
/**
- * devlink_dpipe_table_unregister - unregister dpipe table
+ * devl_dpipe_table_unregister - unregister dpipe table
*
- * @devlink: devlink
- * @table_name: table name
+ * @devlink: devlink
+ * @table_name: table name
*/
-void devlink_dpipe_table_unregister(struct devlink *devlink,
- const char *table_name)
+void devl_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
{
struct devlink_dpipe_table *table;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
table_name, devlink);
if (!table)
- goto unlock;
+ return;
list_del_rcu(&table->list);
- mutex_unlock(&devlink->lock);
kfree_rcu(table, rcu);
- return;
-unlock:
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
/**
- * devlink_resource_register - devlink resource register
+ * devl_resource_register - devlink resource register
*
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
*
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
*/
-int devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
+int devl_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
{
struct devlink_resource *resource;
struct list_head *resource_list;
bool top_hierarchy;
- int err = 0;
+
+ lockdep_assert_held(&devlink->lock);
top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
- mutex_lock(&devlink->lock);
resource = devlink_resource_find(devlink, NULL, resource_id);
- if (resource) {
- err = -EINVAL;
- goto out;
- }
+ if (resource)
+ return -EINVAL;
resource = kzalloc(sizeof(*resource), GFP_KERNEL);
- if (!resource) {
- err = -ENOMEM;
- goto out;
- }
+ if (!resource)
+ return -ENOMEM;
if (top_hierarchy) {
resource_list = &devlink->resource_list;
@@ -9928,8 +10847,7 @@ int devlink_resource_register(struct devlink *devlink,
resource->parent = parent_resource;
} else {
kfree(resource);
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
}
@@ -9942,8 +10860,40 @@ int devlink_resource_register(struct devlink *devlink,
sizeof(resource->size_params));
INIT_LIST_HEAD(&resource->resource_list);
list_add_tail(&resource->list, resource_list);
-out:
- mutex_unlock(&devlink->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_register);
+
+/**
+ * devlink_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_resource_register(devlink, resource_name, resource_size,
+ resource_id, parent_resource_id, size_params);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_resource_register);
@@ -9962,15 +10912,15 @@ static void devlink_resource_unregister(struct devlink *devlink,
}
/**
- * devlink_resources_unregister - free all resources
+ * devl_resources_unregister - free all resources
*
- * @devlink: devlink
+ * @devlink: devlink
*/
-void devlink_resources_unregister(struct devlink *devlink)
+void devl_resources_unregister(struct devlink *devlink)
{
struct devlink_resource *tmp, *child_resource;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
list) {
@@ -9978,69 +10928,100 @@ void devlink_resources_unregister(struct devlink *devlink)
list_del(&child_resource->list);
kfree(child_resource);
}
+}
+EXPORT_SYMBOL_GPL(devl_resources_unregister);
- mutex_unlock(&devlink->lock);
+/**
+ * devlink_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resources_unregister(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_resources_unregister(devlink);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_resources_unregister);
/**
- * devlink_resource_size_get - get and update size
+ * devl_resource_size_get - get and update size
*
- * @devlink: devlink
- * @resource_id: the requested resource id
- * @p_resource_size: ptr to update
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
*/
-int devlink_resource_size_get(struct devlink *devlink,
- u64 resource_id,
- u64 *p_resource_size)
+int devl_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size)
{
struct devlink_resource *resource;
- int err = 0;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
resource = devlink_resource_find(devlink, NULL, resource_id);
- if (!resource) {
- err = -EINVAL;
- goto out;
- }
+ if (!resource)
+ return -EINVAL;
*p_resource_size = resource->size_new;
resource->size = resource->size_new;
-out:
- mutex_unlock(&devlink->lock);
- return err;
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_resource_size_get);
+EXPORT_SYMBOL_GPL(devl_resource_size_get);
/**
- * devlink_dpipe_table_resource_set - set the resource id
+ * devl_dpipe_table_resource_set - set the resource id
*
- * @devlink: devlink
- * @table_name: table name
- * @resource_id: resource id
- * @resource_units: number of resource's units consumed per table's entry
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
*/
-int devlink_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units)
+int devl_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
{
struct devlink_dpipe_table *table;
- int err = 0;
- mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
table_name, devlink);
- if (!table) {
- err = -EINVAL;
- goto out;
- }
+ if (!table)
+ return -EINVAL;
+
table->resource_id = resource_id;
table->resource_units = resource_units;
table->resource_valid = true;
-out:
- mutex_unlock(&devlink->lock);
- return err;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
+
+/**
+ * devl_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+void devl_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ return;
+ WARN_ON(resource->occ_get);
+
+ resource->occ_get = occ_get;
+ resource->occ_get_priv = occ_get_priv;
}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
/**
* devlink_resource_occ_get_register - register occupancy getter
@@ -10049,48 +11030,58 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
* @resource_id: resource id
* @occ_get: occupancy getter callback
* @occ_get_priv: occupancy getter callback priv
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
void devlink_resource_occ_get_register(struct devlink *devlink,
u64 resource_id,
devlink_resource_occ_get_t *occ_get,
void *occ_get_priv)
{
+ devl_lock(devlink);
+ devl_resource_occ_get_register(devlink, resource_id,
+ occ_get, occ_get_priv);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ * devl_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ */
+void devl_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
struct devlink_resource *resource;
- mutex_lock(&devlink->lock);
+ lockdep_assert_held(&devlink->lock);
+
resource = devlink_resource_find(devlink, NULL, resource_id);
if (WARN_ON(!resource))
- goto out;
- WARN_ON(resource->occ_get);
+ return;
+ WARN_ON(!resource->occ_get);
- resource->occ_get = occ_get;
- resource->occ_get_priv = occ_get_priv;
-out:
- mutex_unlock(&devlink->lock);
+ resource->occ_get = NULL;
+ resource->occ_get_priv = NULL;
}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
/**
* devlink_resource_occ_get_unregister - unregister occupancy getter
*
* @devlink: devlink
* @resource_id: resource id
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
void devlink_resource_occ_get_unregister(struct devlink *devlink,
u64 resource_id)
{
- struct devlink_resource *resource;
-
- mutex_lock(&devlink->lock);
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (WARN_ON(!resource))
- goto out;
- WARN_ON(!resource->occ_get);
-
- resource->occ_get = NULL;
- resource->occ_get_priv = NULL;
-out:
- mutex_unlock(&devlink->lock);
+ devl_lock(devlink);
+ devl_resource_occ_get_unregister(devlink, resource_id);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
@@ -10311,51 +11302,67 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
EXPORT_SYMBOL_GPL(devlink_param_value_changed);
/**
- * devlink_region_create - create a new address region
+ * devl_region_create - create a new address region
*
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
*/
-struct devlink_region *
-devlink_region_create(struct devlink *devlink,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
+struct devlink_region *devl_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots,
+ u64 region_size)
{
struct devlink_region *region;
- int err = 0;
+
+ devl_assert_locked(devlink);
if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
return ERR_PTR(-EINVAL);
- mutex_lock(&devlink->lock);
-
- if (devlink_region_get_by_name(devlink, ops->name)) {
- err = -EEXIST;
- goto unlock;
- }
+ if (devlink_region_get_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
region = kzalloc(sizeof(*region), GFP_KERNEL);
- if (!region) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!region)
+ return ERR_PTR(-ENOMEM);
region->devlink = devlink;
region->max_snapshots = region_max_snapshots;
region->ops = ops;
region->size = region_size;
INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
list_add_tail(&region->list, &devlink->region_list);
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
- mutex_unlock(&devlink->lock);
return region;
+}
+EXPORT_SYMBOL_GPL(devl_region_create);
-unlock:
- mutex_unlock(&devlink->lock);
- return ERR_PTR(err);
+/**
+ * devlink_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink_region *region;
+
+ devl_lock(devlink);
+ region = devl_region_create(devlink, ops, region_max_snapshots,
+ region_size);
+ devl_unlock(devlink);
+ return region;
}
EXPORT_SYMBOL_GPL(devlink_region_create);
@@ -10366,6 +11373,8 @@ EXPORT_SYMBOL_GPL(devlink_region_create);
* @ops: region operations and name
* @region_max_snapshots: Maximum supported number of snapshots for region
* @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
*/
struct devlink_region *
devlink_port_region_create(struct devlink_port *port,
@@ -10376,10 +11385,12 @@ devlink_port_region_create(struct devlink_port *port,
struct devlink_region *region;
int err = 0;
+ ASSERT_DEVLINK_PORT_INITIALIZED(port);
+
if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
return ERR_PTR(-EINVAL);
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
if (devlink_port_region_get_by_name(port, ops->name)) {
err = -EEXIST;
@@ -10398,40 +11409,58 @@ devlink_port_region_create(struct devlink_port *port,
region->port_ops = ops;
region->size = region_size;
INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
list_add_tail(&region->list, &port->region_list);
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return region;
unlock:
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(devlink_port_region_create);
/**
- * devlink_region_destroy - destroy address region
+ * devl_region_destroy - destroy address region
*
- * @region: devlink region to destroy
+ * @region: devlink region to destroy
*/
-void devlink_region_destroy(struct devlink_region *region)
+void devl_region_destroy(struct devlink_region *region)
{
struct devlink *devlink = region->devlink;
struct devlink_snapshot *snapshot, *ts;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
/* Free all snapshots of region */
list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
devlink_region_snapshot_del(region, snapshot);
list_del(&region->list);
+ mutex_destroy(&region->snapshot_lock);
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
- mutex_unlock(&devlink->lock);
kfree(region);
}
+EXPORT_SYMBOL_GPL(devl_region_destroy);
+
+/**
+ * devlink_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+ struct devlink *devlink = region->devlink;
+
+ devl_lock(devlink);
+ devl_region_destroy(region);
+ devl_unlock(devlink);
+}
EXPORT_SYMBOL_GPL(devlink_region_destroy);
/**
@@ -10451,13 +11480,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy);
*/
int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
{
- int err;
-
- mutex_lock(&devlink->lock);
- err = __devlink_region_snapshot_id_get(devlink, id);
- mutex_unlock(&devlink->lock);
-
- return err;
+ return __devlink_region_snapshot_id_get(devlink, id);
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
@@ -10473,9 +11496,7 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
*/
void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
{
- mutex_lock(&devlink->lock);
__devlink_snapshot_id_decrement(devlink, id);
- mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
@@ -10494,13 +11515,11 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
int devlink_region_snapshot_create(struct devlink_region *region,
u8 *data, u32 snapshot_id)
{
- struct devlink *devlink = region->devlink;
int err;
- mutex_lock(&devlink->lock);
+ mutex_lock(&region->snapshot_lock);
err = __devlink_region_snapshot_create(region, data, snapshot_id);
- mutex_unlock(&devlink->lock);
-
+ mutex_unlock(&region->snapshot_lock);
return err;
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
@@ -10865,7 +11884,7 @@ static void devlink_trap_disable(struct devlink *devlink,
}
/**
- * devlink_traps_register - Register packet traps with devlink.
+ * devl_traps_register - Register packet traps with devlink.
* @devlink: devlink.
* @traps: Packet traps.
* @traps_count: Count of provided packet traps.
@@ -10873,16 +11892,16 @@ static void devlink_trap_disable(struct devlink *devlink,
*
* Return: Non-zero value on failure.
*/
-int devlink_traps_register(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count, void *priv)
+int devl_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
{
int i, err;
if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
return -EINVAL;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = 0; i < traps_count; i++) {
const struct devlink_trap *trap = &traps[i];
@@ -10894,7 +11913,6 @@ int devlink_traps_register(struct devlink *devlink,
if (err)
goto err_trap_register;
}
- mutex_unlock(&devlink->lock);
return 0;
@@ -10902,24 +11920,47 @@ err_trap_register:
err_trap_verify:
for (i--; i >= 0; i--)
devlink_trap_unregister(devlink, &traps[i]);
- mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_traps_register);
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_traps_register(devlink, traps, traps_count, priv);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_traps_register);
/**
- * devlink_traps_unregister - Unregister packet traps from devlink.
+ * devl_traps_unregister - Unregister packet traps from devlink.
* @devlink: devlink.
* @traps: Packet traps.
* @traps_count: Count of provided packet traps.
*/
-void devlink_traps_unregister(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count)
+void devl_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
{
int i;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
/* Make sure we do not have any packets in-flight while unregistering
* traps by disabling all of them and waiting for a grace period.
*/
@@ -10928,7 +11969,24 @@ void devlink_traps_unregister(struct devlink *devlink,
synchronize_rcu();
for (i = traps_count - 1; i >= 0; i--)
devlink_trap_unregister(devlink, &traps[i]);
- mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_traps_unregister);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ devl_lock(devlink);
+ devl_traps_unregister(devlink, traps, traps_count);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_traps_unregister);
@@ -10940,8 +11998,8 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
stats = this_cpu_ptr(trap_stats);
u64_stats_update_begin(&stats->syncp);
- stats->rx_bytes += skb_len;
- stats->rx_packets++;
+ u64_stats_add(&stats->rx_bytes, skb_len);
+ u64_stats_inc(&stats->rx_packets);
u64_stats_update_end(&stats->syncp);
}
@@ -11087,20 +12145,20 @@ devlink_trap_group_unregister(struct devlink *devlink,
}
/**
- * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * devl_trap_groups_register - Register packet trap groups with devlink.
* @devlink: devlink.
* @groups: Packet trap groups.
* @groups_count: Count of provided packet trap groups.
*
* Return: Non-zero value on failure.
*/
-int devlink_trap_groups_register(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
+int devl_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
{
int i, err;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = 0; i < groups_count; i++) {
const struct devlink_trap_group *group = &groups[i];
@@ -11112,7 +12170,6 @@ int devlink_trap_groups_register(struct devlink *devlink,
if (err)
goto err_trap_group_register;
}
- mutex_unlock(&devlink->lock);
return 0;
@@ -11120,27 +12177,66 @@ err_trap_group_register:
err_trap_group_verify:
for (i--; i >= 0; i--)
devlink_trap_group_unregister(devlink, &groups[i]);
- mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_register);
+
+/**
+ * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_trap_groups_register(devlink, groups, groups_count);
+ devl_unlock(devlink);
return err;
}
EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
/**
- * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
* @devlink: devlink.
* @groups: Packet trap groups.
* @groups_count: Count of provided packet trap groups.
*/
-void devlink_trap_groups_unregister(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
+void devl_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
{
int i;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = groups_count - 1; i >= 0; i--)
devlink_trap_group_unregister(devlink, &groups[i]);
- mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
+
+/**
+ * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ devl_lock(devlink);
+ devl_trap_groups_unregister(devlink, groups, groups_count);
+ devl_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
@@ -11226,7 +12322,7 @@ devlink_trap_policer_unregister(struct devlink *devlink,
}
/**
- * devlink_trap_policers_register - Register packet trap policers with devlink.
+ * devl_trap_policers_register - Register packet trap policers with devlink.
* @devlink: devlink.
* @policers: Packet trap policers.
* @policers_count: Count of provided packet trap policers.
@@ -11234,13 +12330,13 @@ devlink_trap_policer_unregister(struct devlink *devlink,
* Return: Non-zero value on failure.
*/
int
-devlink_trap_policers_register(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
+devl_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
{
int i, err;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = 0; i < policers_count; i++) {
const struct devlink_trap_policer *policer = &policers[i];
@@ -11255,44 +12351,40 @@ devlink_trap_policers_register(struct devlink *devlink,
if (err)
goto err_trap_policer_register;
}
- mutex_unlock(&devlink->lock);
-
return 0;
err_trap_policer_register:
err_trap_policer_verify:
for (i--; i >= 0; i--)
devlink_trap_policer_unregister(devlink, &policers[i]);
- mutex_unlock(&devlink->lock);
return err;
}
-EXPORT_SYMBOL_GPL(devlink_trap_policers_register);
+EXPORT_SYMBOL_GPL(devl_trap_policers_register);
/**
- * devlink_trap_policers_unregister - Unregister packet trap policers from devlink.
+ * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
* @devlink: devlink.
* @policers: Packet trap policers.
* @policers_count: Count of provided packet trap policers.
*/
void
-devlink_trap_policers_unregister(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
+devl_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
{
int i;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
for (i = policers_count - 1; i >= 0; i--)
devlink_trap_policer_unregister(devlink, &policers[i]);
- mutex_unlock(&devlink->lock);
}
-EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister);
+EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
{
+ struct devlink_info_req req = {};
const struct nlattr *nlattr;
- struct devlink_info_req req;
struct sk_buff *msg;
int rem, err;
@@ -11338,9 +12430,9 @@ void devlink_compat_running_version(struct devlink *devlink,
if (!devlink->ops->info_get)
return;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
__devlink_compat_running_version(devlink, buf, len);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
}
int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
@@ -11355,11 +12447,11 @@ int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
if (ret)
return ret;
- mutex_lock(&devlink->lock);
+ devl_lock(devlink);
devlink_flash_update_begin_notify(devlink);
ret = devlink->ops->flash_update(devlink, &params, NULL);
devlink_flash_update_end_notify(devlink);
- mutex_unlock(&devlink->lock);
+ devl_unlock(devlink);
release_firmware(params.fw);
@@ -11412,25 +12504,18 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
/* In case network namespace is getting destroyed, reload
* all devlink instances from this namespace into init_net.
*/
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), net))
- goto retry;
-
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
+ mutex_lock(&devlink->lock);
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
+ mutex_unlock(&devlink->lock);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
-retry:
devlink_put(devlink);
}
- mutex_unlock(&devlink_mutex);
}
static struct pernet_operations devlink_pernet_ops __net_initdata = {
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..f084a4a6b7ab 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -51,12 +51,11 @@ static bool monitor_hw;
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
- * It also guards the global 'hw_stats_list' list.
*/
static DEFINE_MUTEX(net_dm_mutex);
struct net_dm_stats {
- u64 dropped;
+ u64_stats_t dropped;
struct u64_stats_sync syncp;
};
@@ -87,11 +86,9 @@ struct per_cpu_dm_data {
};
struct dm_hw_stat_delta {
- struct net_device *dev;
unsigned long last_rx;
- struct list_head list;
- struct rcu_head rcu;
unsigned long last_drop_val;
+ struct rcu_head rcu;
};
static struct genl_family net_drop_monitor_family;
@@ -102,7 +99,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
-static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len;
@@ -126,6 +122,7 @@ struct net_dm_skb_cb {
struct devlink_trap_metadata *hw_metadata;
void *pc;
};
+ enum skb_drop_reason reason;
};
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -273,29 +270,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
int work, int budget)
{
- struct dm_hw_stat_delta *new_stat;
-
+ struct net_device *dev = napi->dev;
+ struct dm_hw_stat_delta *stat;
/*
* Don't check napi structures with no associated device
*/
- if (!napi->dev)
+ if (!dev)
return;
rcu_read_lock();
- list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ stat = rcu_dereference(dev->dm_private);
+ if (stat) {
/*
* only add a note to our monitor buffer if:
- * 1) this is the dev we received on
- * 2) its after the last_rx delta
- * 3) our rx_dropped count has gone up
+ * 1) its after the last_rx delta
+ * 2) our rx_dropped count has gone up
*/
- if ((new_stat->dev == napi->dev) &&
- (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
- (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+ if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
+ (dev->stats.rx_dropped != stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
- new_stat->last_drop_val = napi->dev->stats.rx_dropped;
- new_stat->last_rx = jiffies;
- break;
+ stat->last_drop_val = dev->stats.rx_dropped;
+ stat->last_rx = jiffies;
}
}
rcu_read_unlock();
@@ -469,7 +464,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
goto out;
hw_entry = &hw_entries->entries[hw_entries->num_entries];
- strlcpy(hw_entry->trap_name, metadata->trap_name,
+ strscpy(hw_entry->trap_name, metadata->trap_name,
NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
hw_entry->count = 1;
hw_entries->num_entries++;
@@ -498,6 +493,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
{
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data;
+ struct net_dm_skb_cb *cb;
struct sk_buff *nskb;
unsigned long flags;
@@ -508,7 +504,11 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- NET_DM_SKB_CB(nskb)->pc = location;
+ if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0))
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ cb = NET_DM_SKB_CB(nskb);
+ cb->reason = reason;
+ cb->pc = location;
/* Override the timestamp because we care about the time when the
* packet was dropped.
*/
@@ -530,7 +530,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
unlock_free:
spin_unlock_irqrestore(&data->drop_queue.lock, flags);
u64_stats_update_begin(&data->stats.syncp);
- data->stats.dropped++;
+ u64_stats_inc(&data->stats.dropped);
u64_stats_update_end(&data->stats.syncp);
consume_skb(nskb);
}
@@ -553,7 +553,8 @@ static size_t net_dm_in_port_size(void)
#define NET_DM_MAX_SYMBOL_LEN 40
-static size_t net_dm_packet_report_size(size_t payload_len)
+static size_t net_dm_packet_report_size(size_t payload_len,
+ enum skb_drop_reason reason)
{
size_t size;
@@ -574,6 +575,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
nla_total_size(sizeof(u32)) +
/* NET_DM_ATTR_PROTO */
nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_REASON */
+ nla_total_size(strlen(drop_reasons[reason]) + 1) +
/* NET_DM_ATTR_PAYLOAD */
nla_total_size(payload_len);
}
@@ -606,7 +609,7 @@ nla_put_failure:
static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
size_t payload_len)
{
- u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+ struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
char buf[NET_DM_MAX_SYMBOL_LEN];
struct nlattr *attr;
void *hdr;
@@ -620,10 +623,15 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
+ NET_DM_ATTR_PAD))
goto nla_put_failure;
- snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+ if (nla_put_string(msg, NET_DM_ATTR_REASON,
+ drop_reasons[cb->reason]))
+ goto nla_put_failure;
+
+ snprintf(buf, sizeof(buf), "%pS", cb->pc);
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
goto nla_put_failure;
@@ -679,7 +687,9 @@ static void net_dm_packet_report(struct sk_buff *skb)
if (net_dm_trunc_len)
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
- msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len,
+ NET_DM_SKB_CB(skb)->reason),
+ GFP_KERNEL);
if (!msg)
goto out;
@@ -854,7 +864,8 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
}
hw_metadata->input_dev = metadata->input_dev;
- dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC);
+ netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker,
+ GFP_ATOMIC);
return hw_metadata;
@@ -870,7 +881,7 @@ free_hw_metadata:
static void
net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata)
{
- dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker);
+ netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker);
kfree(hw_metadata->fa_cookie);
kfree(hw_metadata->trap_name);
kfree(hw_metadata->trap_group_name);
@@ -975,7 +986,7 @@ net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink,
unlock_free:
spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
u64_stats_update_begin(&hw_data->stats.syncp);
- hw_data->stats.dropped++;
+ u64_stats_inc(&hw_data->stats.dropped);
u64_stats_update_end(&hw_data->stats.syncp);
net_dm_hw_metadata_free(n_hw_metadata);
free:
@@ -1165,7 +1176,6 @@ err_module_put:
static void net_dm_trace_off_set(void)
{
- struct dm_hw_stat_delta *new_stat, *temp;
const struct net_dm_alert_ops *ops;
int cpu;
@@ -1189,13 +1199,6 @@ static void net_dm_trace_off_set(void)
consume_skb(skb);
}
- list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
- if (new_stat->dev == NULL) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- }
- }
-
module_put(THIS_MODULE);
}
@@ -1430,10 +1433,10 @@ static void net_dm_stats_read(struct net_dm_stats *stats)
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- dropped = cpu_stats->dropped;
+ dropped = u64_stats_read(&cpu_stats->dropped);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->dropped += dropped;
+ u64_stats_add(&stats->dropped, dropped);
}
}
@@ -1449,7 +1452,7 @@ static int net_dm_stats_put(struct sk_buff *msg)
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
- stats.dropped, NET_DM_ATTR_PAD))
+ u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -1474,10 +1477,10 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats)
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
- dropped = cpu_stats->dropped;
+ dropped = u64_stats_read(&cpu_stats->dropped);
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
- stats->dropped += dropped;
+ u64_stats_add(&stats->dropped, dropped);
}
}
@@ -1493,7 +1496,7 @@ static int net_dm_hw_stats_put(struct sk_buff *msg)
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
- stats.dropped, NET_DM_ATTR_PAD))
+ u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
goto nla_put_failure;
nla_nest_end(msg, attr);
@@ -1556,38 +1559,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct dm_hw_stat_delta *new_stat = NULL;
- struct dm_hw_stat_delta *tmp;
+ struct dm_hw_stat_delta *stat;
switch (event) {
case NETDEV_REGISTER:
- new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+ if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
+ break;
+ stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat)
+ break;
- if (!new_stat)
- goto out;
+ stat->last_rx = jiffies;
+ rcu_assign_pointer(dev->dm_private, stat);
- new_stat->dev = dev;
- new_stat->last_rx = jiffies;
- mutex_lock(&net_dm_mutex);
- list_add_rcu(&new_stat->list, &hw_stats_list);
- mutex_unlock(&net_dm_mutex);
break;
case NETDEV_UNREGISTER:
- mutex_lock(&net_dm_mutex);
- list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
- if (new_stat->dev == dev) {
- new_stat->dev = NULL;
- if (trace_state == TRACE_OFF) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- break;
- }
- }
+ stat = rtnl_dereference(dev->dm_private);
+ if (stat) {
+ rcu_assign_pointer(dev->dm_private, NULL);
+ kfree_rcu(stat, rcu);
}
- mutex_unlock(&net_dm_mutex);
break;
}
-out:
return NOTIFY_DONE;
}
@@ -1652,6 +1645,7 @@ static struct genl_family net_drop_monitor_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = dropmon_ops,
.n_small_ops = ARRAY_SIZE(dropmon_ops),
+ .resv_start_op = NET_DM_CMD_STATS_GET + 1,
.mcgrps = dropmon_mcgrps,
.n_mcgrps = ARRAY_SIZE(dropmon_mcgrps),
};
diff --git a/net/core/dst.c b/net/core/dst.c
index d16c2c9bfebd..bc9c9be4e080 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -49,7 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
unsigned short flags)
{
dst->dev = dev;
- dev_hold_track(dev, &dst->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &dst->dev_tracker, GFP_ATOMIC);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
@@ -117,7 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
if (dst->ops->destroy)
dst->ops->destroy(dst);
- dev_put_track(dst->dev, &dst->dev_tracker);
+ netdev_put(dst->dev, &dst->dev_tracker);
lwtstate_put(dst->lwtstate);
@@ -159,8 +159,8 @@ void dst_dev_put(struct dst_entry *dst)
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = blackhole_netdev;
- dev_replace_track(dev, blackhole_netdev, &dst->dev_tracker,
- GFP_ATOMIC);
+ netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
+ GFP_ATOMIC);
}
EXPORT_SYMBOL(dst_dev_put);
diff --git a/net/core/failover.c b/net/core/failover.c
index dcaa92a85ea2..864d2d83eff4 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -252,7 +252,7 @@ struct failover *failover_register(struct net_device *dev,
return ERR_PTR(-ENOMEM);
rcu_assign_pointer(failover->ops, ops);
- dev_hold_track(dev, &failover->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &failover->dev_tracker, GFP_KERNEL);
dev->priv_flags |= IFF_FAILOVER;
rcu_assign_pointer(failover->failover_dev, dev);
@@ -285,7 +285,7 @@ void failover_unregister(struct failover *failover)
failover_dev->name);
failover_dev->priv_flags &= ~IFF_FAILOVER;
- dev_put_track(failover_dev, &failover->dev_tracker);
+ netdev_put(failover_dev, &failover->dev_tracker);
spin_lock(&failover_lock);
list_del(&failover->list);
diff --git a/net/core/filter.c b/net/core/filter.c
index 4603b7cd3cd1..bb0136e7a8e4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -18,6 +18,7 @@
*/
#include <linux/atomic.h>
+#include <linux/bpf_verifier.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -78,6 +79,7 @@
#include <linux/btf_ids.h>
#include <net/tls.h>
#include <net/xdp.h>
+#include <net/mptcp.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -236,7 +238,7 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u16 tmp, *ptr;
+ __be16 tmp, *ptr;
const int len = sizeof(tmp);
if (offset >= 0) {
@@ -263,7 +265,7 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u32 tmp, *ptr;
+ __be32 tmp, *ptr;
const int len = sizeof(tmp);
if (likely(offset >= 0)) {
@@ -1213,10 +1215,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
- if (filter_size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+ if (filter_size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
@@ -1547,7 +1550,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1614,7 +1617,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -1687,7 +1690,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1722,7 +1725,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -2107,7 +2110,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -2176,7 +2179,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2274,7 +2277,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2603,7 +2606,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
+ if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@ -2710,6 +2713,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
@@ -2809,7 +2815,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- __clear_bit(new, &msg->sg.copy);
+ __clear_bit(new, msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@ -3005,7 +3011,7 @@ BPF_CALL_0(bpf_get_cgroup_classid_curr)
return __task_get_classid(current);
}
-static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
+const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
.func = bpf_get_cgroup_classid_curr,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -3783,6 +3789,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+{
+ return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3845,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
+{
+ unsigned long ptr_len, ptr_off = 0;
+ skb_frag_t *next_frag, *end_frag;
+ struct skb_shared_info *sinfo;
+ void *src, *dst;
+ u8 *ptr_buf;
+
+ if (likely(xdp->data_end - xdp->data >= off + len)) {
+ src = flush ? buf : xdp->data + off;
+ dst = flush ? xdp->data + off : buf;
+ memcpy(dst, src, len);
+ return;
+ }
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ end_frag = &sinfo->frags[sinfo->nr_frags];
+ next_frag = &sinfo->frags[0];
+
+ ptr_len = xdp->data_end - xdp->data;
+ ptr_buf = xdp->data;
+
+ while (true) {
+ if (off < ptr_off + ptr_len) {
+ unsigned long copy_off = off - ptr_off;
+ unsigned long copy_len = min(len, ptr_len - copy_off);
+
+ src = flush ? buf : ptr_buf + copy_off;
+ dst = flush ? ptr_buf + copy_off : buf;
+ memcpy(dst, src, copy_len);
+
+ off += copy_len;
+ len -= copy_len;
+ buf += copy_len;
+ }
+
+ if (!len || next_frag == end_frag)
+ break;
+
+ ptr_off += ptr_len;
+ ptr_buf = skb_frag_address(next_frag);
+ ptr_len = skb_frag_size(next_frag);
+ next_frag++;
+ }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ u32 size = xdp->data_end - xdp->data;
+ void *addr = xdp->data;
+ int i;
+
+ if (unlikely(offset > 0xffff || len > 0xffff))
+ return ERR_PTR(-EFAULT);
+
+ if (offset + len > xdp_get_buff_len(xdp))
+ return ERR_PTR(-EINVAL);
+
+ if (offset < size) /* linear area */
+ goto out;
+
+ offset -= size;
+ for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+ u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+ if (offset < frag_size) {
+ addr = skb_frag_address(&sinfo->frags[i]);
+ size = frag_size;
+ break;
+ }
+ offset -= frag_size;
+ }
+out:
+ return offset + len <= size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+ else
+ memcpy(buf, ptr, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+ .func = bpf_xdp_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+ else
+ memcpy(ptr, buf, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+ .func = bpf_xdp_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+ struct xdp_rxq_info *rxq = xdp->rxq;
+ unsigned int tailroom;
+
+ if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+ return -EOPNOTSUPP;
+
+ tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ if (unlikely(offset > tailroom))
+ return -EINVAL;
+
+ memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+ skb_frag_size_add(frag, offset);
+ sinfo->xdp_frags_size += offset;
+
+ return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, n_frags_free = 0, len_free = 0;
+
+ if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+ return -EINVAL;
+
+ for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ int shrink = min_t(int, offset, skb_frag_size(frag));
+
+ len_free += shrink;
+ offset -= shrink;
+
+ if (skb_frag_size(frag) == shrink) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem,
+ false, NULL);
+ n_frags_free++;
+ } else {
+ skb_frag_size_sub(frag, shrink);
+ break;
+ }
+ }
+ sinfo->nr_frags -= n_frags_free;
+ sinfo->xdp_frags_size -= len_free;
+
+ if (unlikely(!sinfo->nr_frags)) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp->data_end -= offset;
+ }
+
+ return 0;
+}
+
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
+ if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+ if (offset < 0)
+ return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+ return bpf_xdp_frags_increase_tail(xdp, offset);
+ }
+
/* Notice that xdp_data_hard_end have reserved some tailroom */
if (unlikely(data_end > data_hard_end))
return -EINVAL;
@@ -4047,6 +4272,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
+ /* XDP_REDIRECT is not fully supported yet for xdp frags since
+ * not all XDP capable drivers can map non-linear xdp_frame in
+ * ndo_xdp_xmit.
+ */
+ if (unlikely(xdp_buff_has_frags(xdp) &&
+ map_type != BPF_MAP_TYPE_CPUMAP))
+ return -EOPNOTSUPP;
+
if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
@@ -4257,7 +4490,8 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
void *to_orig = to;
int err;
- if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
+ if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6 |
+ BPF_F_TUNINFO_FLAGS)))) {
err = -EINVAL;
goto err_clear;
}
@@ -4268,6 +4502,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
err = -EINVAL;
switch (size) {
+ case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
goto set_compat;
@@ -4288,15 +4523,22 @@ set_compat:
to->tunnel_id = be64_to_cpu(info->key.tun_id);
to->tunnel_tos = info->key.tos;
to->tunnel_ttl = info->key.ttl;
- to->tunnel_ext = 0;
+ if (flags & BPF_F_TUNINFO_FLAGS)
+ to->tunnel_flags = info->key.tun_flags;
+ else
+ to->tunnel_ext = 0;
if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
sizeof(to->remote_ipv6));
+ memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
+ sizeof(to->local_ipv6));
to->tunnel_label = be32_to_cpu(info->key.label);
} else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
+ to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
+ memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3);
to->tunnel_label = 0;
}
@@ -4367,6 +4609,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
+ case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
@@ -4409,10 +4652,14 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->mode |= IP_TUNNEL_INFO_IPV6;
memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
sizeof(from->remote_ipv6));
+ memcpy(&info->key.u.ipv6.src, from->local_ipv6,
+ sizeof(from->local_ipv6));
info->key.label = cpu_to_be32(from->tunnel_label) &
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
+ info->key.flow_flags = FLOWI_FLAG_ANYSRC;
}
return 0;
@@ -4590,10 +4837,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
};
#endif
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
unsigned long off, unsigned long len)
{
- memcpy(dst_buff, src_buff + off, len);
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+ bpf_xdp_copy_buf(xdp, off, dst, len, false);
return 0;
}
@@ -4604,11 +4853,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(!xdp ||
- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+ if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
return -EFAULT;
- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+ return bpf_event_output(map, flags, meta, meta_size, xdp,
xdp_size, bpf_xdp_copy);
}
@@ -4770,353 +5019,316 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-static int _bpf_setsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen)
+static int sol_socket_sockopt(struct sock *sk, int optname,
+ char *optval, int *optlen,
+ bool getopt)
+{
+ switch (optname) {
+ case SO_REUSEADDR:
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ case SO_KEEPALIVE:
+ case SO_PRIORITY:
+ case SO_REUSEPORT:
+ case SO_RCVLOWAT:
+ case SO_MARK:
+ case SO_MAX_PACING_RATE:
+ case SO_BINDTOIFINDEX:
+ case SO_TXREHASH:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ break;
+ case SO_BINDTODEVICE:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (getopt) {
+ if (optname == SO_BINDTODEVICE)
+ return -EINVAL;
+ return sk_getsockopt(sk, SOL_SOCKET, optname,
+ KERNEL_SOCKPTR(optval),
+ KERNEL_SOCKPTR(optlen));
+ }
+
+ return sk_setsockopt(sk, SOL_SOCKET, optname,
+ KERNEL_SOCKPTR(optval), *optlen);
+}
+
+static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
+ char *optval, int optlen)
{
- char devname[IFNAMSIZ];
- int val, valbool;
- struct net *net;
- int ifindex;
- int ret = 0;
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long timeout;
+ int val;
- if (!sk_fullsock(sk))
+ if (optlen != sizeof(int))
return -EINVAL;
- sock_owned_by_me(sk);
+ val = *(int *)optval;
- if (level == SOL_SOCKET) {
- if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
+ /* Only some options are supported */
+ switch (optname) {
+ case TCP_BPF_IW:
+ if (val <= 0 || tp->data_segs_out > tp->syn_data)
return -EINVAL;
- val = *((int *)optval);
- valbool = val ? 1 : 0;
-
- /* Only some socketops are supported */
- switch (optname) {
- case SO_RCVBUF:
- val = min_t(u32, val, sysctl_rmem_max);
- val = min_t(int, val, INT_MAX / 2);
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- WRITE_ONCE(sk->sk_rcvbuf,
- max_t(int, val * 2, SOCK_MIN_RCVBUF));
- break;
- case SO_SNDBUF:
- val = min_t(u32, val, sysctl_wmem_max);
- val = min_t(int, val, INT_MAX / 2);
- sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- WRITE_ONCE(sk->sk_sndbuf,
- max_t(int, val * 2, SOCK_MIN_SNDBUF));
- break;
- case SO_MAX_PACING_RATE: /* 32bit version */
- if (val != ~0U)
- cmpxchg(&sk->sk_pacing_status,
- SK_PACING_NONE,
- SK_PACING_NEEDED);
- sk->sk_max_pacing_rate = (val == ~0U) ?
- ~0UL : (unsigned int)val;
- sk->sk_pacing_rate = min(sk->sk_pacing_rate,
- sk->sk_max_pacing_rate);
- break;
- case SO_PRIORITY:
- sk->sk_priority = val;
- break;
- case SO_RCVLOWAT:
- if (val < 0)
- val = INT_MAX;
- WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
- break;
- case SO_MARK:
- if (sk->sk_mark != val) {
- sk->sk_mark = val;
- sk_dst_reset(sk);
- }
- break;
- case SO_BINDTODEVICE:
- optlen = min_t(long, optlen, IFNAMSIZ - 1);
- strncpy(devname, optval, optlen);
- devname[optlen] = 0;
+ tcp_snd_cwnd_set(tp, val);
+ break;
+ case TCP_BPF_SNDCWND_CLAMP:
+ if (val <= 0)
+ return -EINVAL;
+ tp->snd_cwnd_clamp = val;
+ tp->snd_ssthresh = val;
+ break;
+ case TCP_BPF_DELACK_MAX:
+ timeout = usecs_to_jiffies(val);
+ if (timeout > TCP_DELACK_MAX ||
+ timeout < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ inet_csk(sk)->icsk_delack_max = timeout;
+ break;
+ case TCP_BPF_RTO_MIN:
+ timeout = usecs_to_jiffies(val);
+ if (timeout > TCP_RTO_MIN ||
+ timeout < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ inet_csk(sk)->icsk_rto_min = timeout;
+ break;
+ default:
+ return -EINVAL;
+ }
- ifindex = 0;
- if (devname[0] != '\0') {
- struct net_device *dev;
+ return 0;
+}
- ret = -ENODEV;
+static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
+ int *optlen, bool getopt)
+{
+ struct tcp_sock *tp;
+ int ret;
- net = sock_net(sk);
- dev = dev_get_by_name(net, devname);
- if (!dev)
- break;
- ifindex = dev->ifindex;
- dev_put(dev);
- }
- fallthrough;
- case SO_BINDTOIFINDEX:
- if (optname == SO_BINDTOIFINDEX)
- ifindex = val;
- ret = sock_bindtoindex(sk, ifindex, false);
- break;
- case SO_KEEPALIVE:
- if (sk->sk_prot->keepalive)
- sk->sk_prot->keepalive(sk, valbool);
- sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
- break;
- case SO_REUSEPORT:
- sk->sk_reuseport = valbool;
- break;
- default:
- ret = -EINVAL;
- }
-#ifdef CONFIG_INET
- } else if (level == SOL_IP) {
- if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ if (*optlen < 2)
+ return -EINVAL;
+
+ if (getopt) {
+ if (!inet_csk(sk)->icsk_ca_ops)
return -EINVAL;
+ /* BPF expects NULL-terminated tcp-cc string */
+ optval[--(*optlen)] = '\0';
+ return do_tcp_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ KERNEL_SOCKPTR(optval),
+ KERNEL_SOCKPTR(optlen));
+ }
- val = *((int *)optval);
- /* Only some options are supported */
- switch (optname) {
- case IP_TOS:
- if (val < -1 || val > 0xff) {
- ret = -EINVAL;
- } else {
- struct inet_sock *inet = inet_sk(sk);
+ /* "cdg" is the only cc that alloc a ptr
+ * in inet_csk_ca area. The bpf-tcp-cc may
+ * overwrite this ptr after switching to cdg.
+ */
+ if (*optlen >= sizeof("cdg") - 1 && !strncmp("cdg", optval, *optlen))
+ return -ENOTSUPP;
- if (val == -1)
- val = 0;
- inet->tos = val;
- }
- break;
- default:
- ret = -EINVAL;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (level == SOL_IPV6) {
- if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
- return -EINVAL;
+ /* It stops this looping
+ *
+ * .init => bpf_setsockopt(tcp_cc) => .init =>
+ * bpf_setsockopt(tcp_cc)" => .init => ....
+ *
+ * The second bpf_setsockopt(tcp_cc) is not allowed
+ * in order to break the loop when both .init
+ * are the same bpf prog.
+ *
+ * This applies even the second bpf_setsockopt(tcp_cc)
+ * does not cause a loop. This limits only the first
+ * '.init' can call bpf_setsockopt(TCP_CONGESTION) to
+ * pick a fallback cc (eg. peer does not support ECN)
+ * and the second '.init' cannot fallback to
+ * another.
+ */
+ tp = tcp_sk(sk);
+ if (tp->bpf_chg_cc_inprogress)
+ return -EBUSY;
+
+ tp->bpf_chg_cc_inprogress = 1;
+ ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ KERNEL_SOCKPTR(optval), *optlen);
+ tp->bpf_chg_cc_inprogress = 0;
+ return ret;
+}
- val = *((int *)optval);
- /* Only some options are supported */
- switch (optname) {
- case IPV6_TCLASS:
- if (val < -1 || val > 0xff) {
- ret = -EINVAL;
- } else {
- struct ipv6_pinfo *np = inet6_sk(sk);
+static int sol_tcp_sockopt(struct sock *sk, int optname,
+ char *optval, int *optlen,
+ bool getopt)
+{
+ if (sk->sk_prot->setsockopt != tcp_setsockopt)
+ return -EINVAL;
- if (val == -1)
- val = 0;
- np->tclass = val;
- }
- break;
- default:
- ret = -EINVAL;
- }
-#endif
- } else if (level == SOL_TCP &&
- sk->sk_prot->setsockopt == tcp_setsockopt) {
- if (optname == TCP_CONGESTION) {
- char name[TCP_CA_NAME_MAX];
-
- strncpy(name, optval, min_t(long, optlen,
- TCP_CA_NAME_MAX-1));
- name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false, true);
- } else {
- struct inet_connection_sock *icsk = inet_csk(sk);
+ switch (optname) {
+ case TCP_NODELAY:
+ case TCP_MAXSEG:
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+ case TCP_KEEPCNT:
+ case TCP_SYNCNT:
+ case TCP_WINDOW_CLAMP:
+ case TCP_THIN_LINEAR_TIMEOUTS:
+ case TCP_USER_TIMEOUT:
+ case TCP_NOTSENT_LOWAT:
+ case TCP_SAVE_SYN:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ break;
+ case TCP_CONGESTION:
+ return sol_tcp_sockopt_congestion(sk, optval, optlen, getopt);
+ case TCP_SAVED_SYN:
+ if (*optlen < 1)
+ return -EINVAL;
+ break;
+ default:
+ if (getopt)
+ return -EINVAL;
+ return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
+ }
+
+ if (getopt) {
+ if (optname == TCP_SAVED_SYN) {
struct tcp_sock *tp = tcp_sk(sk);
- unsigned long timeout;
- if (optlen != sizeof(int))
+ if (!tp->saved_syn ||
+ *optlen > tcp_saved_syn_len(tp->saved_syn))
return -EINVAL;
-
- val = *((int *)optval);
- /* Only some options are supported */
- switch (optname) {
- case TCP_BPF_IW:
- if (val <= 0 || tp->data_segs_out > tp->syn_data)
- ret = -EINVAL;
- else
- tp->snd_cwnd = val;
- break;
- case TCP_BPF_SNDCWND_CLAMP:
- if (val <= 0) {
- ret = -EINVAL;
- } else {
- tp->snd_cwnd_clamp = val;
- tp->snd_ssthresh = val;
- }
- break;
- case TCP_BPF_DELACK_MAX:
- timeout = usecs_to_jiffies(val);
- if (timeout > TCP_DELACK_MAX ||
- timeout < TCP_TIMEOUT_MIN)
- return -EINVAL;
- inet_csk(sk)->icsk_delack_max = timeout;
- break;
- case TCP_BPF_RTO_MIN:
- timeout = usecs_to_jiffies(val);
- if (timeout > TCP_RTO_MIN ||
- timeout < TCP_TIMEOUT_MIN)
- return -EINVAL;
- inet_csk(sk)->icsk_rto_min = timeout;
- break;
- case TCP_SAVE_SYN:
- if (val < 0 || val > 1)
- ret = -EINVAL;
- else
- tp->save_syn = val;
- break;
- case TCP_KEEPIDLE:
- ret = tcp_sock_set_keepidle_locked(sk, val);
- break;
- case TCP_KEEPINTVL:
- if (val < 1 || val > MAX_TCP_KEEPINTVL)
- ret = -EINVAL;
- else
- tp->keepalive_intvl = val * HZ;
- break;
- case TCP_KEEPCNT:
- if (val < 1 || val > MAX_TCP_KEEPCNT)
- ret = -EINVAL;
- else
- tp->keepalive_probes = val;
- break;
- case TCP_SYNCNT:
- if (val < 1 || val > MAX_TCP_SYNCNT)
- ret = -EINVAL;
- else
- icsk->icsk_syn_retries = val;
- break;
- case TCP_USER_TIMEOUT:
- if (val < 0)
- ret = -EINVAL;
- else
- icsk->icsk_user_timeout = val;
- break;
- case TCP_NOTSENT_LOWAT:
- tp->notsent_lowat = val;
- sk->sk_write_space(sk);
- break;
- case TCP_WINDOW_CLAMP:
- ret = tcp_set_window_clamp(sk, val);
- break;
- default:
- ret = -EINVAL;
- }
+ memcpy(optval, tp->saved_syn->data, *optlen);
+ /* It cannot free tp->saved_syn here because it
+ * does not know if the user space still needs it.
+ */
+ return 0;
}
-#endif
- } else {
- ret = -EINVAL;
+
+ return do_tcp_getsockopt(sk, SOL_TCP, optname,
+ KERNEL_SOCKPTR(optval),
+ KERNEL_SOCKPTR(optlen));
}
- return ret;
+
+ return do_tcp_setsockopt(sk, SOL_TCP, optname,
+ KERNEL_SOCKPTR(optval), *optlen);
}
-static int _bpf_getsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen)
+static int sol_ip_sockopt(struct sock *sk, int optname,
+ char *optval, int *optlen,
+ bool getopt)
{
- if (!sk_fullsock(sk))
- goto err_clear;
+ if (sk->sk_family != AF_INET)
+ return -EINVAL;
- sock_owned_by_me(sk);
+ switch (optname) {
+ case IP_TOS:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
- if (level == SOL_SOCKET) {
- if (optlen != sizeof(int))
- goto err_clear;
+ if (getopt)
+ return do_ip_getsockopt(sk, SOL_IP, optname,
+ KERNEL_SOCKPTR(optval),
+ KERNEL_SOCKPTR(optlen));
- switch (optname) {
- case SO_RCVBUF:
- *((int *)optval) = sk->sk_rcvbuf;
- break;
- case SO_SNDBUF:
- *((int *)optval) = sk->sk_sndbuf;
- break;
- case SO_MARK:
- *((int *)optval) = sk->sk_mark;
- break;
- case SO_PRIORITY:
- *((int *)optval) = sk->sk_priority;
- break;
- case SO_BINDTOIFINDEX:
- *((int *)optval) = sk->sk_bound_dev_if;
- break;
- case SO_REUSEPORT:
- *((int *)optval) = sk->sk_reuseport;
- break;
- default:
- goto err_clear;
- }
-#ifdef CONFIG_INET
- } else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
- struct inet_connection_sock *icsk;
- struct tcp_sock *tp;
+ return do_ip_setsockopt(sk, SOL_IP, optname,
+ KERNEL_SOCKPTR(optval), *optlen);
+}
- switch (optname) {
- case TCP_CONGESTION:
- icsk = inet_csk(sk);
+static int sol_ipv6_sockopt(struct sock *sk, int optname,
+ char *optval, int *optlen,
+ bool getopt)
+{
+ if (sk->sk_family != AF_INET6)
+ return -EINVAL;
- if (!icsk->icsk_ca_ops || optlen <= 1)
- goto err_clear;
- strncpy(optval, icsk->icsk_ca_ops->name, optlen);
- optval[optlen - 1] = 0;
- break;
- case TCP_SAVED_SYN:
- tp = tcp_sk(sk);
+ switch (optname) {
+ case IPV6_TCLASS:
+ case IPV6_AUTOFLOWLABEL:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
- if (optlen <= 0 || !tp->saved_syn ||
- optlen > tcp_saved_syn_len(tp->saved_syn))
- goto err_clear;
- memcpy(optval, tp->saved_syn->data, optlen);
- break;
- default:
- goto err_clear;
- }
- } else if (level == SOL_IP) {
- struct inet_sock *inet = inet_sk(sk);
+ if (getopt)
+ return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname,
+ KERNEL_SOCKPTR(optval),
+ KERNEL_SOCKPTR(optlen));
- if (optlen != sizeof(int) || sk->sk_family != AF_INET)
- goto err_clear;
+ return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname,
+ KERNEL_SOCKPTR(optval), *optlen);
+}
- /* Only some options are supported */
- switch (optname) {
- case IP_TOS:
- *((int *)optval) = (int)inet->tos;
- break;
- default:
- goto err_clear;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (level == SOL_IPV6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
+static int __bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
+ if (!sk_fullsock(sk))
+ return -EINVAL;
- if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
- goto err_clear;
+ if (level == SOL_SOCKET)
+ return sol_socket_sockopt(sk, optname, optval, &optlen, false);
+ else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
+ return sol_ip_sockopt(sk, optname, optval, &optlen, false);
+ else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
+ return sol_ipv6_sockopt(sk, optname, optval, &optlen, false);
+ else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
+ return sol_tcp_sockopt(sk, optname, optval, &optlen, false);
- /* Only some options are supported */
- switch (optname) {
- case IPV6_TCLASS:
- *((int *)optval) = (int)np->tclass;
- break;
- default:
- goto err_clear;
- }
-#endif
-#endif
- } else {
- goto err_clear;
- }
- return 0;
-err_clear:
- memset(optval, 0, optlen);
return -EINVAL;
}
-BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
- int, optname, char *, optval, int, optlen)
+static int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
{
- if (level == SOL_TCP && optname == TCP_CONGESTION) {
- if (optlen >= sizeof("cdg") - 1 &&
- !strncmp("cdg", optval, optlen))
- return -ENOTSUPP;
+ if (sk_fullsock(sk))
+ sock_owned_by_me(sk);
+ return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+static int __bpf_getsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
+ int err, saved_optlen = optlen;
+
+ if (!sk_fullsock(sk)) {
+ err = -EINVAL;
+ goto done;
}
+ if (level == SOL_SOCKET)
+ err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
+ else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
+ err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
+ else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
+ err = sol_ip_sockopt(sk, optname, optval, &optlen, true);
+ else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
+ err = sol_ipv6_sockopt(sk, optname, optval, &optlen, true);
+ else
+ err = -EINVAL;
+
+done:
+ if (err)
+ optlen = 0;
+ if (optlen < saved_optlen)
+ memset(optval + optlen, 0, saved_optlen - optlen);
+ return err;
+}
+
+static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
+{
+ if (sk_fullsock(sk))
+ sock_owned_by_me(sk);
+ return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
return _bpf_setsockopt(sk, level, optname, optval, optlen);
}
@@ -5148,6 +5360,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
+ .func = bpf_unlocked_sk_setsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
+ .func = bpf_unlocked_sk_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -5906,7 +6152,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
if (err)
return err;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
@@ -6167,6 +6412,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
int dif, int sdif, u8 family, u8 proto)
{
+ struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
bool refcounted = false;
struct sock *sk = NULL;
@@ -6175,7 +6421,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
__be32 dst4 = tuple->ipv4.daddr;
if (proto == IPPROTO_TCP)
- sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
+ sk = __inet_lookup(net, hinfo, NULL, 0,
src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
dif, sdif, &refcounted);
@@ -6189,7 +6435,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
if (proto == IPPROTO_TCP)
- sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
+ sk = __inet6_lookup(net, hinfo, NULL, 0,
src6, tuple->ipv6.sport,
dst6, ntohs(tuple->ipv6.dport),
dif, sdif, &refcounted);
@@ -6211,8 +6457,6 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
/* bpf_skc_lookup performs the core lookup for different types of sockets,
* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
- * Returns the socket as an 'unsigned long' to simplify the casting in the
- * callers to satisfy BPF_CALL declarations.
*/
static struct sock *
__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
@@ -6220,8 +6464,8 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
u64 flags)
{
struct sock *sk = NULL;
- u8 family = AF_UNSPEC;
struct net *net;
+ u8 family;
int sdif;
if (len == sizeof(tuple->ipv4))
@@ -6231,8 +6475,7 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
else
return NULL;
- if (unlikely(family == AF_UNSPEC || flags ||
- !((s32)netns_id < 0 || netns_id <= S32_MAX)))
+ if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
if (family == AF_INET)
@@ -6264,10 +6507,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
ifindex, proto, netns_id, flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6301,10 +6555,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6379,7 +6644,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -6768,30 +7033,39 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -EINVAL;
if (!th->ack || th->rst || th->syn)
return -ENOENT;
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
cookie = ntohl(th->ack_seq) - 1;
- switch (sk->sk_family) {
- case AF_INET:
- if (unlikely(iph_len < sizeof(struct iphdr)))
+ /* Both struct iphdr and struct ipv6hdr have the version field at the
+ * same offset so we can cast to the shorter header (struct iphdr).
+ */
+ switch (((struct iphdr *)iph)->version) {
+ case 4:
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
break;
#if IS_BUILTIN(CONFIG_IPV6)
- case AF_INET6:
+ case 6:
if (unlikely(iph_len < sizeof(struct ipv6hdr)))
return -EINVAL;
+ if (sk->sk_family != AF_INET6)
+ return -EINVAL;
+
ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
break;
#endif /* CONFIG_IPV6 */
@@ -6834,7 +7108,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
@@ -6848,7 +7122,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
*/
switch (((struct iphdr *)iph)->version) {
case 4:
- if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
@@ -7146,6 +7420,151 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
+ u64, tstamp, u32, tstamp_type)
+{
+ /* skb_clear_delivery_time() is done for inet protocol */
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))
+ return -EOPNOTSUPP;
+
+ switch (tstamp_type) {
+ case BPF_SKB_TSTAMP_DELIVERY_MONO:
+ if (!tstamp)
+ return -EINVAL;
+ skb->tstamp = tstamp;
+ skb->mono_delivery_time = 1;
+ break;
+ case BPF_SKB_TSTAMP_UNSPEC:
+ if (tstamp)
+ return -EINVAL;
+ skb->tstamp = 0;
+ skb->mono_delivery_time = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
+ .func = bpf_skb_set_tstamp,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+#ifdef CONFIG_SYN_COOKIES
+BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
+ struct tcphdr *, th, u32, th_len)
+{
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
+ cookie = __cookie_v4_init_sequence(iph, th, &mss);
+
+ return cookie | ((u64)mss << 32);
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
+ .func = bpf_tcp_raw_gen_syncookie_ipv4,
+ .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct iphdr),
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
+ struct tcphdr *, th, u32, th_len)
+{
+#if IS_BUILTIN(CONFIG_IPV6)
+ const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
+ sizeof(struct ipv6hdr);
+ u32 cookie;
+ u16 mss;
+
+ if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ return -EINVAL;
+
+ mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
+ cookie = __cookie_v6_init_sequence(iph, th, &mss);
+
+ return cookie | ((u64)mss << 32);
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
+ .func = bpf_tcp_raw_gen_syncookie_ipv6,
+ .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct ipv6hdr),
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
+ struct tcphdr *, th)
+{
+ u32 cookie = ntohl(th->ack_seq) - 1;
+
+ if (__cookie_v4_check(iph, th, cookie) > 0)
+ return 0;
+
+ return -EACCES;
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
+ .func = bpf_tcp_raw_check_syncookie_ipv4,
+ .gpl_only = true, /* __cookie_v4_check is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct iphdr),
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_size = sizeof(struct tcphdr),
+};
+
+BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
+ struct tcphdr *, th)
+{
+#if IS_BUILTIN(CONFIG_IPV6)
+ u32 cookie = ntohl(th->ack_seq) - 1;
+
+ if (__cookie_v6_check(iph, th, cookie) > 0)
+ return 0;
+
+ return -EACCES;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
+ .func = bpf_tcp_raw_check_syncookie_ipv6,
+ .gpl_only = true, /* __cookie_v6_check is GPL */
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg1_size = sizeof(struct ipv6hdr),
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
+ .arg2_size = sizeof(struct tcphdr),
+};
+#endif /* CONFIG_SYN_COOKIES */
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -7192,34 +7611,23 @@ const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
+ func_proto = cgroup_common_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
+ func_proto = cgroup_current_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
switch (func_id) {
- /* inet and inet6 sockets are created in a process
- * context so there is always a valid uid/gid
- */
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sock_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
- case BPF_FUNC_get_current_pid_tgid:
- return &bpf_get_current_pid_tgid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
-#endif
-#ifdef CONFIG_CGROUP_NET_CLASSID
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_curr_proto;
-#endif
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_cg_sock_proto;
case BPF_FUNC_ktime_get_coarse_ns:
@@ -7232,12 +7640,17 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
static const struct bpf_func_proto *
sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
+ func_proto = cgroup_common_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
+ func_proto = cgroup_current_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
switch (func_id) {
- /* inet and inet6 sockets are created in a process
- * context so there is always a valid uid/gid
- */
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_bind:
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET4_CONNECT:
@@ -7250,24 +7663,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_sock_addr_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sock_addr_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
- case BPF_FUNC_get_current_pid_tgid:
- return &bpf_get_current_pid_tgid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
-#endif
-#ifdef CONFIG_CGROUP_NET_CLASSID
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_curr_proto;
-#endif
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
return &bpf_sock_addr_sk_lookup_tcp_proto;
@@ -7348,9 +7745,13 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
static const struct bpf_func_proto *
cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
+ func_proto = cgroup_common_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
switch (func_id) {
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
case BPF_FUNC_sk_storage_get:
@@ -7507,6 +7908,18 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
case BPF_FUNC_sk_assign:
return &bpf_sk_assign_proto;
+ case BPF_FUNC_skb_set_tstamp:
+ return &bpf_skb_set_tstamp_proto;
+#ifdef CONFIG_SYN_COOKIES
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
+ return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
+ return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
+ return &bpf_tcp_raw_check_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
+ return &bpf_tcp_raw_check_syncookie_ipv6_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -7533,6 +7946,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_proto;
+ case BPF_FUNC_xdp_load_bytes:
+ return &bpf_xdp_load_bytes_proto;
+ case BPF_FUNC_xdp_store_bytes:
+ return &bpf_xdp_store_bytes_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
case BPF_FUNC_check_mtu:
@@ -7550,6 +7969,16 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_tcp_gen_syncookie:
return &bpf_tcp_gen_syncookie_proto;
+#ifdef CONFIG_SYN_COOKIES
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
+ return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
+ return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
+ return &bpf_tcp_raw_check_syncookie_ipv4_proto;
+ case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
+ return &bpf_tcp_raw_check_syncookie_ipv6_proto;
+#endif
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -7562,6 +7991,12 @@ const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
static const struct bpf_func_proto *
sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
+ func_proto = cgroup_common_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
switch (func_id) {
case BPF_FUNC_setsockopt:
return &bpf_sock_ops_setsockopt_proto;
@@ -7575,8 +8010,6 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_hash_update_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_ops_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
case BPF_FUNC_sk_storage_get:
@@ -7840,7 +8273,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
- case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+ case offsetof(struct __sk_buff, tstamp_type):
+ return false;
+ case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
/* Explicitly prohibit access to padding in __sk_buff. */
return false;
default:
@@ -8030,6 +8465,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
+ int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
@@ -8041,7 +8477,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
- case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -8050,6 +8485,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range(struct bpf_sock, dst_port):
+ field_size = size == size_default ?
+ size_default : sizeof_field(struct bpf_sock, dst_port);
+ bpf_ctx_record_field_size(info, field_size);
+ return bpf_ctx_narrow_access_ok(off, size, field_size);
+ case offsetofend(struct bpf_sock, dst_port) ...
+ offsetof(struct bpf_sock, dst_ip4) - 1:
+ return false;
}
return size == size_default;
@@ -8187,11 +8630,50 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break;
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
+ case offsetof(struct __sk_buff, tstamp_type):
+ /* The convert_ctx_access() on reading and writing
+ * __sk_buff->tstamp depends on whether the bpf prog
+ * has used __sk_buff->tstamp_type or not.
+ * Thus, we need to set prog->tstamp_type_access
+ * earlier during is_valid_access() here.
+ */
+ ((struct bpf_prog *)prog)->tstamp_type_access = 1;
+ return size == sizeof(__u8);
}
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
+DEFINE_MUTEX(nf_conn_btf_access_lock);
+EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
+
+int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype, u32 *next_btf_id,
+ enum bpf_type_flag *flag);
+EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
+
+static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ int ret = -EACCES;
+
+ if (atype == BPF_READ)
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
+
+ mutex_lock(&nf_conn_btf_access_lock);
+ if (nfct_btf_struct_access)
+ ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
+ mutex_unlock(&nf_conn_btf_access_lock);
+
+ return ret;
+}
+
static bool __is_valid_xdp_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct xdp_md))
@@ -8251,6 +8733,27 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static int xdp_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ int ret = -EACCES;
+
+ if (atype == BPF_READ)
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
+
+ mutex_lock(&nf_conn_btf_access_lock);
+ if (nfct_btf_struct_access)
+ ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag);
+ mutex_unlock(&nf_conn_btf_access_lock);
+
+ return ret;
+}
+
static bool sock_addr_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -8582,6 +9085,25 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->dst_reg;
+ __u8 skb_reg = si->src_reg;
+ /* AX is needed because src_reg and dst_reg could be the same */
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
+ PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_MASK, 2);
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
+ *insn++ = BPF_JMP_A(1);
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
+
+ return insn;
+}
+
static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
@@ -8603,6 +9125,74 @@ static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
return insn;
}
+static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->dst_reg;
+ __u8 skb_reg = si->src_reg;
+
+#ifdef CONFIG_NET_CLS_ACT
+ /* If the tstamp_type is read,
+ * the bpf prog is aware the tstamp could have delivery time.
+ * Thus, read skb->tstamp as is if tstamp_type_access is true.
+ */
+ if (!prog->tstamp_type_access) {
+ /* AX is needed because src_reg and dst_reg could be the same */
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
+ /* skb->tc_at_ingress && skb->mono_delivery_time,
+ * read 0 as the (rcv) timestamp.
+ */
+ *insn++ = BPF_MOV64_IMM(value_reg, 0);
+ *insn++ = BPF_JMP_A(1);
+ }
+#endif
+
+ *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
+ offsetof(struct sk_buff, tstamp));
+ return insn;
+}
+
+static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->src_reg;
+ __u8 skb_reg = si->dst_reg;
+
+#ifdef CONFIG_NET_CLS_ACT
+ /* If the tstamp_type is read,
+ * the bpf prog is aware the tstamp could have delivery time.
+ * Thus, write skb->tstamp as is if tstamp_type_access is true.
+ * Otherwise, writing at ingress will have to clear the
+ * mono_delivery_time bit also.
+ */
+ if (!prog->tstamp_type_access) {
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
+ /* Writing __sk_buff->tstamp as ingress, goto <clear> */
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
+ /* goto <store> */
+ *insn++ = BPF_JMP_A(2);
+ /* <clear>: mono_delivery_time */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET);
+ }
+#endif
+
+ /* <store>: skb->tstamp = tstamp */
+ *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
+ offsetof(struct sk_buff, tstamp));
+ return insn;
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -8911,17 +9501,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_DW,
- si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- tstamp, 8,
- target_size));
+ insn = bpf_convert_tstamp_write(prog, si, insn);
else
- *insn++ = BPF_LDX_MEM(BPF_DW,
- si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- tstamp, 8,
- target_size));
+ insn = bpf_convert_tstamp_read(prog, si, insn);
+ break;
+
+ case offsetof(struct __sk_buff, tstamp_type):
+ insn = bpf_convert_tstamp_type_read(si, insn);
break;
case offsetof(struct __sk_buff, gso_segs):
@@ -10062,7 +10648,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
- .check_kfunc_call = bpf_prog_test_check_kfunc_call,
+ .btf_struct_access = tc_cls_act_btf_struct_access,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
@@ -10074,6 +10660,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = {
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
.gen_prologue = bpf_noop_prologue,
+ .btf_struct_access = xdp_btf_struct_access,
};
const struct bpf_prog_ops xdp_prog_ops = {
@@ -10208,14 +10795,13 @@ int sk_detach_filter(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
- unsigned int len)
+int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
{
struct sock_fprog_kern *fprog;
struct sk_filter *filter;
int ret = 0;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
filter = rcu_dereference_protected(sk->sk_filter,
lockdep_sock_is_held(sk));
if (!filter)
@@ -10240,7 +10826,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
goto out;
ret = -EFAULT;
- if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
+ if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
goto out;
/* Instead of bytes, the API requests to return the number
@@ -10248,7 +10834,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
*/
ret = fprog->len;
out:
- release_sock(sk);
+ sockopt_release_sock(sk);
return ret;
}
@@ -10601,12 +11187,24 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
- case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
bpf_ctx_record_field_size(info, sizeof(__u32));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ /* Allow 4-byte access to 2-byte field for backward compatibility */
+ if (size == sizeof(__u32))
+ return true;
+ bpf_ctx_record_field_size(info, sizeof(__be16));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16));
+
+ case offsetofend(struct bpf_sk_lookup, remote_port) ...
+ offsetof(struct bpf_sk_lookup, local_ip4) - 1:
+ /* Allow access to zero padding for backward compatibility */
+ bpf_ctx_record_field_size(info, sizeof(__u16));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16));
+
default:
return false;
}
@@ -10688,6 +11286,11 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
sport, 2, target_size));
break;
+ case offsetofend(struct bpf_sk_lookup, remote_port):
+ *target_size = 2;
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+ break;
+
case offsetof(struct bpf_sk_lookup, local_port):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
@@ -10858,6 +11461,20 @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
};
+BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
+{
+ BTF_TYPE_EMIT(struct mptcp_sock);
+ return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
+}
+
+const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
+ .func = bpf_skc_to_mptcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
+};
+
BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
return (unsigned long)sock_from_file(file);
@@ -10900,6 +11517,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_unix_sock:
func = &bpf_skc_to_unix_sock_proto;
break;
+ case BPF_FUNC_skc_to_mptcp_sock:
+ func = &bpf_skc_to_mptcp_sock_proto;
+ break;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15833e1d6ea1..25cd35f5922e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -22,6 +22,7 @@
#include <linux/ppp_defs.h>
#include <linux/stddef.h>
#include <linux/if_ether.h>
+#include <linux/if_hsr.h>
#include <linux/mpls.h>
#include <linux/tcp.h>
#include <linux/ptp_classify.h>
@@ -203,6 +204,30 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
}
+static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_l2tpv3 *key_l2tpv3;
+ struct {
+ __be32 session_id;
+ } *hdr, _hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_L2TPV3))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_l2tpv3 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_L2TPV3,
+ target_container);
+
+ key_l2tpv3->session_id = hdr->session_id;
+}
+
void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container)
@@ -865,8 +890,8 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
}
}
-bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
- __be16 proto, int nhoff, int hlen, unsigned int flags)
+u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen, unsigned int flags)
{
struct bpf_flow_keys *flow_keys = ctx->flow_keys;
u32 result;
@@ -891,7 +916,12 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
flow_keys->nhoff, hlen);
- return result == BPF_OK;
+ return result;
+}
+
+static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr)
+{
+ return hdr->ver == 1 && hdr->type == 1 && hdr->code == 0;
}
/**
@@ -1002,6 +1032,7 @@ bool __skb_flow_dissect(const struct net *net,
};
__be16 n_proto = proto;
struct bpf_prog *prog;
+ u32 result;
if (skb) {
ctx.skb = skb;
@@ -1013,13 +1044,16 @@ bool __skb_flow_dissect(const struct net *net,
}
prog = READ_ONCE(run_array->items[0].prog);
- ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
- hlen, flags);
+ result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
+ hlen, flags);
+ if (result == BPF_FLOW_DISSECTOR_CONTINUE)
+ goto dissect_continue;
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
rcu_read_unlock();
- return ret;
+ return result == BPF_OK;
}
+dissect_continue:
rcu_read_unlock();
}
@@ -1031,7 +1065,17 @@ bool __skb_flow_dissect(const struct net *net,
key_eth_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS,
target_container);
- memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+ memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs));
+ }
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) {
+ struct flow_dissector_key_num_of_vlans *key_num_of_vlans;
+
+ key_num_of_vlans = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS,
+ target_container);
+ key_num_of_vlans->num_of_vlans = 0;
}
proto_again:
@@ -1157,6 +1201,16 @@ proto_again:
nhoff += sizeof(*vlan);
}
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS) &&
+ !(key_control->flags & FLOW_DIS_ENCAPSULATION)) {
+ struct flow_dissector_key_num_of_vlans *key_nvs;
+
+ key_nvs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS,
+ target_container);
+ key_nvs->num_of_vlans++;
+ }
+
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
@@ -1182,6 +1236,7 @@ proto_again:
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
key_vlan->vlan_tpid = saved_vlan_tpid;
+ key_vlan->vlan_eth_type = proto;
}
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
@@ -1192,26 +1247,60 @@ proto_again:
struct pppoe_hdr hdr;
__be16 proto;
} *hdr, _hdr;
+ u16 ppp_proto;
+
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr) {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
- nhoff += PPPOE_SES_HLEN;
- switch (hdr->proto) {
- case htons(PPP_IP):
+ if (!is_pppoe_ses_hdr_valid(&hdr->hdr)) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ /* least significant bit of the most significant octet
+ * indicates if protocol field was compressed
+ */
+ ppp_proto = ntohs(hdr->proto);
+ if (ppp_proto & 0x0100) {
+ ppp_proto = ppp_proto >> 8;
+ nhoff += PPPOE_SES_HLEN - 1;
+ } else {
+ nhoff += PPPOE_SES_HLEN;
+ }
+
+ if (ppp_proto == PPP_IP) {
proto = htons(ETH_P_IP);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- break;
- case htons(PPP_IPV6):
+ } else if (ppp_proto == PPP_IPV6) {
proto = htons(ETH_P_IPV6);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
- break;
- default:
+ } else if (ppp_proto == PPP_MPLS_UC) {
+ proto = htons(ETH_P_MPLS_UC);
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ } else if (ppp_proto == PPP_MPLS_MC) {
+ proto = htons(ETH_P_MPLS_MC);
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ } else if (ppp_proto_is_valid(ppp_proto)) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ } else {
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
}
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PPPOE)) {
+ struct flow_dissector_key_pppoe *key_pppoe;
+
+ key_pppoe = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PPPOE,
+ target_container);
+ key_pppoe->session_id = hdr->hdr.sid;
+ key_pppoe->ppp_proto = htons(ppp_proto);
+ key_pppoe->type = htons(ETH_P_PPP_SES);
+ }
break;
}
case htons(ETH_P_TIPC): {
@@ -1282,6 +1371,23 @@ proto_again:
break;
}
+ case htons(ETH_P_PRP):
+ case htons(ETH_P_HSR): {
+ struct hsr_tag *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen,
+ &_hdr);
+ if (!hdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ proto = hdr->encap_proto;
+ nhoff += HSR_HLEN;
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
+ }
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
@@ -1419,6 +1525,10 @@ ip_proto_again:
__skb_flow_dissect_icmp(skb, flow_dissector, target_container,
data, nhoff, hlen);
break;
+ case IPPROTO_L2TP:
+ __skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
default:
break;
@@ -1533,9 +1643,8 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
switch (keys->control.addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
- addr_diff = (__force u32)keys->addrs.v4addrs.dst -
- (__force u32)keys->addrs.v4addrs.src;
- if (addr_diff < 0)
+ if ((__force u32)keys->addrs.v4addrs.dst <
+ (__force u32)keys->addrs.v4addrs.src)
swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
if ((__force u16)keys->ports.dst <
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 73f68d4625f3..abe423fd5736 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -125,6 +125,13 @@ void flow_rule_match_ports(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_ports);
+void flow_rule_match_ports_range(const struct flow_rule *rule,
+ struct flow_match_ports_range *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS_RANGE, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ports_range);
+
void flow_rule_match_tcp(const struct flow_rule *rule,
struct flow_match_tcp *out)
{
@@ -223,6 +230,20 @@ void flow_rule_match_ct(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_ct);
+void flow_rule_match_pppoe(const struct flow_rule *rule,
+ struct flow_match_pppoe *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PPPOE, out);
+}
+EXPORT_SYMBOL(flow_rule_match_pppoe);
+
+void flow_rule_match_l2tpv3(const struct flow_rule *rule,
+ struct flow_match_l2tpv3 *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_L2TPV3, out);
+}
+EXPORT_SYMBOL(flow_rule_match_l2tpv3);
+
struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv))
@@ -595,3 +616,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count;
}
EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
+bool flow_indr_dev_exists(void)
+{
+ return !list_empty(&flow_block_indr_dev_list);
+}
+EXPORT_SYMBOL(flow_indr_dev_exists);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index a10335b4ba2d..c8d137ef5980 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -345,7 +345,7 @@ static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
for_each_possible_cpu(i) {
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
- qstats->qlen += qcpu->backlog;
+ qstats->qlen += qcpu->qlen;
qstats->backlog += qcpu->backlog;
qstats->drops += qcpu->drops;
qstats->requeues += qcpu->requeues;
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..bc9451743307 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -93,6 +93,31 @@ void dev_remove_offload(struct packet_offload *po)
EXPORT_SYMBOL(dev_remove_offload);
/**
+ * skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ * @type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
+ segs = ptype->callbacks.gso_segment(skb, features);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
* skb_mac_gso_segment - mac layer segmentation handler.
* @skb: buffer to segment
* @features: features for the output path (see dev->features)
@@ -135,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int gro_max_size;
unsigned int new_truesize;
struct sk_buff *lp;
+ int segs;
/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
gro_max_size = READ_ONCE(p->dev->gro_max_size);
@@ -142,6 +168,15 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
+ if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
+ if (p->protocol != htons(ETH_P_IPV6) ||
+ skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
+ ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+ p->encapsulation)
+ return -E2BIG;
+ }
+
+ segs = NAPI_GRO_CB(skb)->count;
lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp);
@@ -232,7 +267,7 @@ merge:
lp = p;
done:
- NAPI_GRO_CB(p)->count++;
+ NAPI_GRO_CB(p)->count += segs;
p->data_len += len;
p->truesize += delta_truesize;
p->len += len;
@@ -459,29 +494,29 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
- NAPI_GRO_CB(skb)->same_flow = 0;
- NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->free = 0;
- NAPI_GRO_CB(skb)->encap_mark = 0;
- NAPI_GRO_CB(skb)->recursion_counter = 0;
- NAPI_GRO_CB(skb)->is_fou = 0;
+ BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+ sizeof(u32))); /* Avoid slow unaligned acc */
+ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
+ NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
NAPI_GRO_CB(skb)->is_atomic = 1;
- NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
+ NAPI_GRO_CB(skb)->count = 1;
+ if (unlikely(skb_is_gso(skb))) {
+ NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
+ /* Only support TCP at the moment. */
+ if (!skb_is_gso_tcp(skb))
+ NAPI_GRO_CB(skb)->flush = 1;
+ }
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
- NAPI_GRO_CB(skb)->csum_cnt = 0;
break;
case CHECKSUM_UNNECESSARY:
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
- NAPI_GRO_CB(skb)->csum_valid = 0;
break;
- default:
- NAPI_GRO_CB(skb)->csum_cnt = 0;
- NAPI_GRO_CB(skb)->csum_valid = 0;
}
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
@@ -519,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
else
gro_list->count++;
- NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies;
NAPI_GRO_CB(skb)->last = skb;
- skb_shinfo(skb)->gso_size = skb_gro_len(skb);
+ if (!skb_is_gso(skb))
+ skb_shinfo(skb)->gso_size = skb_gro_len(skb);
list_add(&skb->list, &gro_list->list);
ret = GRO_HELD;
@@ -634,7 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ skb_shinfo(skb)->gso_size = 0;
if (unlikely(skb->slow_gro)) {
skb_orphan(skb);
skb_ext_reset(skb);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 6eb2e5ec2c50..ed5ec5de47f6 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -26,9 +26,9 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
drop:
- atomic_long_inc(&dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
res = NET_RX_DROP;
goto unlock;
@@ -81,16 +81,30 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev)
set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state);
- netif_napi_add(dev, &cell->napi, gro_cell_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add(dev, &cell->napi, gro_cell_poll);
napi_enable(&cell->napi);
}
return 0;
}
EXPORT_SYMBOL(gro_cells_init);
+struct percpu_free_defer {
+ struct rcu_head rcu;
+ void __percpu *ptr;
+};
+
+static void percpu_free_defer_callback(struct rcu_head *head)
+{
+ struct percpu_free_defer *defer;
+
+ defer = container_of(head, struct percpu_free_defer, rcu);
+ free_percpu(defer->ptr);
+ kfree(defer);
+}
+
void gro_cells_destroy(struct gro_cells *gcells)
{
+ struct percpu_free_defer *defer;
int i;
if (!gcells->cells)
@@ -102,12 +116,23 @@ void gro_cells_destroy(struct gro_cells *gcells)
__netif_napi_del(&cell->napi);
__skb_queue_purge(&cell->napi_skbs);
}
- /* This barrier is needed because netpoll could access dev->napi_list
- * under rcu protection.
+ /* We need to observe an rcu grace period before freeing ->cells,
+ * because netpoll could access dev->napi_list under rcu protection.
+ * Try hard using call_rcu() instead of synchronize_rcu(),
+ * because we might be called from cleanup_net(), and we
+ * definitely do not want to block this critical task.
*/
- synchronize_net();
-
- free_percpu(gcells->cells);
+ defer = kmalloc(sizeof(*defer), GFP_KERNEL | __GFP_NOWARN);
+ if (likely(defer)) {
+ defer->ptr = gcells->cells;
+ call_rcu(&defer->rcu, percpu_free_defer_callback);
+ } else {
+ /* We do not hold RTNL at this point, synchronize_net()
+ * would not be able to expedite this sync.
+ */
+ synchronize_rcu_expedited();
+ free_percpu(gcells->cells);
+ }
gcells->cells = NULL;
}
EXPORT_SYMBOL(gro_cells_destroy);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index b0f5344d1185..aa6cb1f90966 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -18,6 +18,7 @@
#include <linux/bitops.h>
#include <linux/types.h>
+#include "dev.h"
enum lw_bits {
LW_URGENT = 0,
@@ -109,7 +110,7 @@ static void linkwatch_add_event(struct net_device *dev)
spin_lock_irqsave(&lweventlist_lock, flags);
if (list_empty(&dev->link_watch_list)) {
list_add_tail(&dev->link_watch_list, &lweventlist);
- dev_hold_track(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
}
@@ -166,10 +167,10 @@ static void linkwatch_do_dev(struct net_device *dev)
netdev_state_change(dev);
}
- /* Note: our callers are responsible for
- * calling netdev_tracker_free().
+ /* Note: our callers are responsible for calling netdev_tracker_free().
+ * This is the reason we use __dev_put() instead of dev_put().
*/
- dev_put(dev);
+ __dev_put(dev);
}
static void __linkwatch_run_queue(int urgent_only)
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 349480ef68a5..8b6b5e72b217 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 9ccd64e8a666..6fac2f0ef074 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -50,6 +50,7 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "IOAM6";
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
+ case LWTUNNEL_ENCAP_XFRM:
case LWTUNNEL_ENCAP_NONE:
case __LWTUNNEL_ENCAP_MAX:
/* should not have got here */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 213cb7b26b7a..a77a85e357e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -111,7 +111,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
unsigned long neigh_rand_reach_time(unsigned long base)
{
- return base ? (prandom_u32() % base) + (base >> 1) : 0;
+ return base ? prandom_u32_max(base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);
@@ -307,11 +307,35 @@ static int neigh_del_timer(struct neighbour *n)
return 0;
}
-static void pneigh_queue_purge(struct sk_buff_head *list)
+static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
{
+ struct sk_buff_head tmp;
+ unsigned long flags;
struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) {
+ skb_queue_head_init(&tmp);
+ spin_lock_irqsave(&list->lock, flags);
+ skb = skb_peek(list);
+ while (skb != NULL) {
+ struct sk_buff *skb_next = skb_peek_next(skb, list);
+ struct net_device *dev = skb->dev;
+
+ if (net == NULL || net_eq(dev_net(dev), net)) {
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev)
+ in_dev->arp_parms->qlen--;
+ rcu_read_unlock();
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&tmp, skb);
+ }
+ skb = skb_next;
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
dev_put(skb->dev);
kfree_skb(skb);
}
@@ -385,9 +409,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev, skip_perm);
pneigh_ifdown_and_unlock(tbl, dev);
-
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
+ if (skb_queue_empty_lockless(&tbl->proxy_queue))
+ del_timer_sync(&tbl->proxy_timer);
return 0;
}
@@ -624,7 +648,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
- dev_hold_track(dev, &n->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
/* Protocol specific setup. */
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
@@ -770,10 +794,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- dev_hold_track(dev, &n->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- dev_put_track(dev, &n->dev_tracker);
+ netdev_put(dev, &n->dev_tracker);
kfree(n);
n = NULL;
goto out;
@@ -805,7 +829,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
- dev_put_track(n->dev, &n->dev_tracker);
+ netdev_put(n->dev, &n->dev_tracker);
kfree(n);
return 0;
}
@@ -838,7 +862,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
n->next = NULL;
if (tbl->pdestructor)
tbl->pdestructor(n);
- dev_put_track(n->dev, &n->dev_tracker);
+ netdev_put(n->dev, &n->dev_tracker);
kfree(n);
}
return -ENOENT;
@@ -879,7 +903,7 @@ void neigh_destroy(struct neighbour *neigh)
if (dev->netdev_ops->ndo_neigh_destroy)
dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
- dev_put_track(dev, &neigh->dev_tracker);
+ netdev_put(dev, &neigh->dev_tracker);
neigh_parms_put(neigh->parms);
neigh_dbg(2, "neigh %p is destroyed\n", neigh);
@@ -1133,7 +1157,8 @@ out:
neigh_release(neigh);
}
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok)
{
int rc;
bool immediate_probe = false;
@@ -1154,18 +1179,23 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
- next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
- HZ/100);
+ if (!immediate_ok) {
+ next = now + 1;
+ } else {
+ immediate_probe = true;
+ next = now + max(NEIGH_VAR(neigh->parms,
+ RETRANS_TIME),
+ HZ / 100);
+ }
neigh_add_timer(neigh, next);
- immediate_probe = true;
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
@@ -1187,7 +1217,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
- kfree_skb(buff);
+ kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
@@ -1209,7 +1239,7 @@ out_dead:
if (neigh->nud_state & NUD_STALE)
goto out_unlock_bh;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
trace_neigh_event_send_dead(neigh, 1);
return 1;
}
@@ -1571,9 +1601,9 @@ static void neigh_managed_work(struct work_struct *work)
write_lock_bh(&tbl->lock);
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
- neigh_event_send(neigh, NULL);
+ neigh_event_send_probe(neigh, NULL, false);
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
- NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+ NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
write_unlock_bh(&tbl->lock);
}
@@ -1591,8 +1621,15 @@ static void neigh_proxy_process(struct timer_list *t)
if (tdif <= 0) {
struct net_device *dev = skb->dev;
+ struct in_device *in_dev;
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev)
+ in_dev->arp_parms->qlen--;
+ rcu_read_unlock();
__skb_unlink(skb, &tbl->proxy_queue);
+
if (tbl->proxy_redo && netif_running(dev)) {
rcu_read_lock();
tbl->proxy_redo(skb);
@@ -1617,7 +1654,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
unsigned long sched_next = jiffies +
prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
- if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
+ if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
kfree_skb(skb);
return;
}
@@ -1633,6 +1670,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
skb_dst_drop(skb);
dev_hold(skb->dev);
__skb_queue_tail(&tbl->proxy_queue, skb);
+ p->qlen++;
mod_timer(&tbl->proxy_timer, sched_next);
spin_unlock(&tbl->proxy_queue.lock);
}
@@ -1665,13 +1703,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
refcount_set(&p->refcnt, 1);
p->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
- dev_hold_track(dev, &p->dev_tracker, GFP_KERNEL);
+ p->qlen = 0;
+ netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
p->dev = dev;
write_pnet(&p->net, net);
p->sysctl_table = NULL;
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
- dev_put_track(dev, &p->dev_tracker);
+ netdev_put(dev, &p->dev_tracker);
kfree(p);
return NULL;
}
@@ -1702,7 +1741,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
list_del(&parms->list);
parms->dead = 1;
write_unlock_bh(&tbl->lock);
- dev_put_track(parms->dev, &parms->dev_tracker);
+ netdev_put(parms->dev, &parms->dev_tracker);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
@@ -1730,6 +1769,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
refcount_set(&tbl->parms.refcnt, 1);
tbl->parms.reachable_time =
neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
+ tbl->parms.qlen = 0;
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
@@ -1781,7 +1821,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_queue_purge(&tbl->proxy_queue, NULL);
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
@@ -1813,9 +1853,6 @@ static struct neigh_table *neigh_find_table(int family)
case AF_INET6:
tbl = neigh_tables[NEIGH_ND_TABLE];
break;
- case AF_DECnet:
- tbl = neigh_tables[NEIGH_DN_TABLE];
- break;
}
return tbl;
@@ -2094,7 +2131,9 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
nla_put_msecs(skb, NDTPA_PROXY_DELAY,
NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_LOCKTIME,
- NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
+ NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
+ nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
+ NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -2249,6 +2288,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
[NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
+ [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
};
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2367,6 +2407,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_get_msecs(tbp[i]));
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
+ case NDTPA_INTERVAL_PROBE_TIME_MS:
+ NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
+ nla_get_msecs(tbp[i]));
+ break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
nla_get_msecs(tbp[i]));
@@ -3364,7 +3408,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -3381,7 +3425,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -3401,7 +3445,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
@@ -3556,6 +3600,22 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
return ret;
}
+static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tmp = *ctl;
+ int ret;
+
+ int min = msecs_to_jiffies(1);
+
+ tmp.extra1 = &min;
+ tmp.extra2 = NULL;
+
+ ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
+ neigh_proc_update(ctl, write);
+ return ret;
+}
+
int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -3652,6 +3712,9 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
+#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
+ NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
+
#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
@@ -3670,6 +3733,8 @@ static struct neigh_sysctl_table {
NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
+ NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
+ "interval_probe_time_ms"),
NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
@@ -3722,7 +3787,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
char *p_name;
- t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto err;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..1ec23bf8b05c 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -4,6 +4,8 @@
#include <linux/seq_file.h>
#include <net/wext.h>
+#include "dev.h"
+
#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
@@ -190,12 +192,23 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -216,22 +229,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -260,7 +291,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 53ea262ecafd..8409d41405df 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include "dev.h"
#include "net-sysfs.h"
#ifdef CONFIG_SYSFS
@@ -32,6 +33,7 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
+/* Caller holds RTNL or dev_base_lock */
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state <= NETREG_REGISTERED;
@@ -57,7 +59,7 @@ static ssize_t netdev_show(const struct device *dev,
#define NETDEVICE_SHOW(field, format_string) \
static ssize_t format_##field(const struct net_device *dev, char *buf) \
{ \
- return sprintf(buf, format_string, dev->field); \
+ return sysfs_emit(buf, format_string, dev->field); \
} \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -116,13 +118,13 @@ static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
{
struct net_device *ndev = to_net_dev(dev);
- return sprintf(buf, fmt_dec, dev_get_iflink(ndev));
+ return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev));
}
static DEVICE_ATTR_RO(iflink);
static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
- return sprintf(buf, fmt_dec, dev->name_assign_type);
+ return sysfs_emit(buf, fmt_dec, dev->name_assign_type);
}
static ssize_t name_assign_type_show(struct device *dev,
@@ -192,7 +194,7 @@ static ssize_t carrier_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
if (netif_running(netdev))
- return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
+ return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
return -EINVAL;
}
@@ -213,11 +215,11 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev)) {
+ if (netif_running(netdev) && netif_device_present(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
- ret = sprintf(buf, fmt_dec, cmd.base.speed);
+ ret = sysfs_emit(buf, fmt_dec, cmd.base.speed);
}
rtnl_unlock();
return ret;
@@ -256,7 +258,7 @@ static ssize_t duplex_show(struct device *dev,
duplex = "unknown";
break;
}
- ret = sprintf(buf, "%s\n", duplex);
+ ret = sysfs_emit(buf, "%s\n", duplex);
}
}
rtnl_unlock();
@@ -270,7 +272,7 @@ static ssize_t testing_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
if (netif_running(netdev))
- return sprintf(buf, fmt_dec, !!netif_testing(netdev));
+ return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev));
return -EINVAL;
}
@@ -282,7 +284,7 @@ static ssize_t dormant_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
if (netif_running(netdev))
- return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
+ return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev));
return -EINVAL;
}
@@ -313,7 +315,7 @@ static ssize_t operstate_show(struct device *dev,
if (operstate >= ARRAY_SIZE(operstates))
return -EINVAL; /* should not happen */
- return sprintf(buf, "%s\n", operstates[operstate]);
+ return sysfs_emit(buf, "%s\n", operstates[operstate]);
}
static DEVICE_ATTR_RO(operstate);
@@ -323,9 +325,9 @@ static ssize_t carrier_changes_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
- return sprintf(buf, fmt_dec,
- atomic_read(&netdev->carrier_up_count) +
- atomic_read(&netdev->carrier_down_count));
+ return sysfs_emit(buf, fmt_dec,
+ atomic_read(&netdev->carrier_up_count) +
+ atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_changes);
@@ -335,7 +337,7 @@ static ssize_t carrier_up_count_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
- return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
+ return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
}
static DEVICE_ATTR_RO(carrier_up_count);
@@ -345,7 +347,7 @@ static ssize_t carrier_down_count_show(struct device *dev,
{
struct net_device *netdev = to_net_dev(dev);
- return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
+ return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_down_count);
@@ -460,7 +462,7 @@ static ssize_t ifalias_show(struct device *dev,
ret = dev_get_alias(netdev, tmp, sizeof(tmp));
if (ret > 0)
- ret = sprintf(buf, "%s\n", tmp);
+ ret = sysfs_emit(buf, "%s\n", tmp);
return ret;
}
static DEVICE_ATTR_RW(ifalias);
@@ -512,7 +514,7 @@ static ssize_t phys_port_id_show(struct device *dev,
ret = dev_get_phys_port_id(netdev, &ppid);
if (!ret)
- ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
}
rtnl_unlock();
@@ -541,7 +543,7 @@ static ssize_t phys_port_name_show(struct device *dev,
ret = dev_get_phys_port_name(netdev, name, sizeof(name));
if (!ret)
- ret = sprintf(buf, "%s\n", name);
+ ret = sysfs_emit(buf, "%s\n", name);
}
rtnl_unlock();
@@ -571,7 +573,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
ret = dev_get_port_parent_id(netdev, &ppid, false);
if (!ret)
- ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
}
rtnl_unlock();
@@ -589,7 +591,7 @@ static ssize_t threaded_show(struct device *dev,
return restart_syscall();
if (dev_isalive(netdev))
- ret = sprintf(buf, fmt_dec, netdev->threaded);
+ ret = sysfs_emit(buf, fmt_dec, netdev->threaded);
rtnl_unlock();
return ret;
@@ -671,7 +673,7 @@ static ssize_t netstat_show(const struct device *d,
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
+ ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
}
read_unlock(&dev_base_lock);
return ret;
@@ -745,7 +747,6 @@ static const struct attribute_group netstat_group = {
.attrs = netstat_attrs,
};
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
static struct attribute *wireless_attrs[] = {
NULL
};
@@ -754,7 +755,19 @@ static const struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
+
+static bool wireless_group_needed(struct net_device *ndev)
+{
+#if IS_ENABLED(CONFIG_CFG80211)
+ if (ndev->ieee80211_ptr)
+ return true;
+#endif
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
+ if (ndev->wireless_handlers)
+ return true;
#endif
+ return false;
+}
#else /* CONFIG_SYSFS */
#define net_class_groups NULL
@@ -811,7 +824,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
for (i = 0; i < map->len; i++)
cpumask_set_cpu(map->cpus[i], mask);
- len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
+ len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask));
rcu_read_unlock();
free_cpumask_var(mask);
@@ -823,7 +836,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
{
struct rps_map *old_map, *map;
cpumask_var_t mask;
- int err, cpu, i, hk_flags;
+ int err, cpu, i;
static DEFINE_MUTEX(rps_map_mutex);
if (!capable(CAP_NET_ADMIN))
@@ -839,8 +852,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
if (!cpumask_empty(mask)) {
- hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
- cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
if (cpumask_empty(mask)) {
free_cpumask_var(mask);
return -EINVAL;
@@ -897,7 +910,7 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
val = (unsigned long)flow_table->mask + 1;
rcu_read_unlock();
- return sprintf(buf, "%lu\n", val);
+ return sysfs_emit(buf, "%lu\n", val);
}
static void rps_dev_flow_table_release(struct rcu_head *rcu)
@@ -1004,7 +1017,7 @@ static void rx_queue_release(struct kobject *kobj)
#endif
memset(kobj, 0, sizeof(*kobj));
- dev_put_track(queue->dev, &queue->dev_tracker);
+ netdev_put(queue->dev, &queue->dev_tracker);
}
static const void *rx_queue_namespace(struct kobject *kobj)
@@ -1044,7 +1057,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
/* Kobject_put later will trigger rx_queue_release call which
* decreases dev refcount: Take that reference here
*/
- dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL);
+ netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
@@ -1195,7 +1208,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
{
unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
- return sprintf(buf, fmt_ulong, trans_timeout);
+ return sysfs_emit(buf, fmt_ulong, trans_timeout);
}
static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
@@ -1242,15 +1255,15 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
* belongs to the root device it will be reported with just the
* traffic class, so just "0" for TC 0 for example.
*/
- return num_tc < 0 ? sprintf(buf, "%d%d\n", tc, num_tc) :
- sprintf(buf, "%d\n", tc);
+ return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) :
+ sysfs_emit(buf, "%d\n", tc);
}
#ifdef CONFIG_XPS
static ssize_t tx_maxrate_show(struct netdev_queue *queue,
char *buf)
{
- return sprintf(buf, "%lu\n", queue->tx_maxrate);
+ return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
}
static ssize_t tx_maxrate_store(struct netdev_queue *queue,
@@ -1304,7 +1317,7 @@ static struct netdev_queue_attribute queue_traffic_class __ro_after_init
*/
static ssize_t bql_show(char *buf, unsigned int value)
{
- return sprintf(buf, "%u\n", value);
+ return sysfs_emit(buf, "%u\n", value);
}
static ssize_t bql_set(const char *buf, const size_t count,
@@ -1333,7 +1346,7 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue,
{
struct dql *dql = &queue->dql;
- return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+ return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
}
static ssize_t bql_set_hold_time(struct netdev_queue *queue,
@@ -1361,7 +1374,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
{
struct dql *dql = &queue->dql;
- return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
+ return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed);
}
static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
@@ -1607,7 +1620,7 @@ static void netdev_queue_release(struct kobject *kobj)
struct netdev_queue *queue = to_netdev_queue(kobj);
memset(kobj, 0, sizeof(*kobj));
- dev_put_track(queue->dev, &queue->dev_tracker);
+ netdev_put(queue->dev, &queue->dev_tracker);
}
static const void *netdev_queue_namespace(struct kobject *kobj)
@@ -1647,7 +1660,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
/* Kobject_put later will trigger netdev_queue_release call
* which decreases dev refcount: Take that reference here
*/
- dev_hold_track(queue->dev, &queue->dev_tracker, GFP_KERNEL);
+ netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
@@ -1995,14 +2008,8 @@ int netdev_register_kobject(struct net_device *ndev)
*groups++ = &netstat_group;
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
- if (ndev->ieee80211_ptr)
- *groups++ = &wireless_group;
-#if IS_ENABLED(CONFIG_WIRELESS_EXT)
- else if (ndev->wireless_handlers)
+ if (wireless_group_needed(ndev))
*groups++ = &wireless_group;
-#endif
-#endif
#endif /* CONFIG_SYSFS */
error = device_add(dev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9b7171c40434..f64654df71a2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem);
static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
#endif
-struct net init_net = {
- .ns.count = REFCOUNT_INIT(1),
- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
-#ifdef CONFIG_KEYS
- .key_domain = &init_net_key_domain,
-#endif
-};
+struct net init_net;
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
@@ -123,6 +117,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
+ struct net_generic *ng;
int err = -ENOMEM;
void *data = NULL;
@@ -141,7 +136,13 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
if (!err)
return 0;
+ if (ops->id && ops->size) {
cleanup:
+ ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&pernet_ops_rwsem));
+ ng->ptr[*ops->id] = NULL;
+ }
+
kfree(data);
out:
@@ -164,8 +165,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
{
struct net *net;
if (ops->exit) {
- list_for_each_entry(net, net_exit_list, exit_list)
+ list_for_each_entry(net, net_exit_list, exit_list) {
ops->exit(net);
+ cond_resched();
+ }
}
if (ops->exit_batch)
ops->exit_batch(net_exit_list);
@@ -299,6 +302,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
return peer;
}
+EXPORT_SYMBOL_GPL(get_net_ns_by_id);
/*
* setup_net runs the initializers for the network namespace object.
@@ -361,6 +365,8 @@ out_undo:
static int __net_init net_defaults_init_net(struct net *net)
{
net->core.sysctl_somaxconn = SOMAXCONN;
+ net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+
return 0;
}
@@ -1082,7 +1088,7 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
-static int __init net_ns_init(void)
+void __init net_ns_init(void)
{
struct net_generic *ng;
@@ -1103,6 +1109,9 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
+#ifdef CONFIG_KEYS
+ init_net.key_domain = &init_net_key_domain;
+#endif
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
@@ -1117,12 +1126,8 @@ static int __init net_ns_init(void)
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
RTNL_FLAG_DOIT_UNLOCKED);
-
- return 0;
}
-pure_initcall(net_ns_init);
-
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
{
ops_pre_exit_list(ops, net_exit_list);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 1a6a86693b74..d6a70aeaa503 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -66,7 +66,7 @@ struct update_classid_context {
#define UPDATE_CLASSID_BATCH 1000
-static int update_classid_sock(const void *v, struct file *file, unsigned n)
+static int update_classid_sock(const void *v, struct file *file, unsigned int n)
{
struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index db724463e7cd..9be762e1d042 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -556,7 +556,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
if ((delim = strchr(cur, ',')) == NULL)
goto parse_failed;
*delim = 0;
- strlcpy(np->dev_name, cur, sizeof(np->dev_name));
+ strscpy(np->dev_name, cur, sizeof(np->dev_name));
cur = delim;
}
cur++;
@@ -610,7 +610,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
int err;
np->dev = ndev;
- strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
+ strscpy(np->dev_name, ndev->name, IFNAMSIZ);
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
@@ -853,7 +853,7 @@ void netpoll_cleanup(struct netpoll *np)
if (!np->dev)
goto out;
__netpoll_cleanup(np);
- dev_put_track(np->dev, &np->dev_tracker);
+ netdev_put(np->dev, &np->dev_tracker);
np->dev = NULL;
out:
rtnl_unlock();
diff --git a/net/core/of_net.c b/net/core/of_net.c
index 95a64c813ae5..f1a9bf7578e7 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -61,7 +61,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
{
struct platform_device *pdev = of_find_device_by_node(np);
struct nvmem_cell *cell;
- const void *buf;
+ const void *mac;
size_t len;
int ret;
@@ -78,32 +78,21 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
if (IS_ERR(cell))
return PTR_ERR(cell);
- buf = nvmem_cell_read(cell, &len);
+ mac = nvmem_cell_read(cell, &len);
nvmem_cell_put(cell);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- ret = 0;
- if (len == ETH_ALEN) {
- if (is_valid_ether_addr(buf))
- memcpy(addr, buf, ETH_ALEN);
- else
- ret = -EINVAL;
- } else if (len == 3 * ETH_ALEN - 1) {
- u8 mac[ETH_ALEN];
-
- if (mac_pton(buf, mac))
- memcpy(addr, mac, ETH_ALEN);
- else
- ret = -EINVAL;
- } else {
- ret = -EINVAL;
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
}
- kfree(buf);
+ memcpy(addr, mac, ETH_ALEN);
+ kfree(mac);
- return ret;
+ return 0;
}
/**
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index bd62c01a2ec3..9b203d8660e4 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -16,8 +16,9 @@
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
#include <linux/page-flags.h>
-#include <linux/mm.h> /* for __put_page() */
+#include <linux/mm.h> /* for put_page() */
#include <linux/poison.h>
+#include <linux/ethtool.h>
#include <trace/events/page_pool.h>
@@ -26,6 +27,112 @@
#define BIAS_MAX LONG_MAX
+#ifdef CONFIG_PAGE_POOL_STATS
+/* alloc_stat_inc is intended to be used in softirq context */
+#define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++)
+/* recycle_stat_inc is safe to use when preemption is possible. */
+#define recycle_stat_inc(pool, __stat) \
+ do { \
+ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
+ this_cpu_inc(s->__stat); \
+ } while (0)
+
+#define recycle_stat_add(pool, __stat, val) \
+ do { \
+ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
+ this_cpu_add(s->__stat, val); \
+ } while (0)
+
+static const char pp_stats[][ETH_GSTRING_LEN] = {
+ "rx_pp_alloc_fast",
+ "rx_pp_alloc_slow",
+ "rx_pp_alloc_slow_ho",
+ "rx_pp_alloc_empty",
+ "rx_pp_alloc_refill",
+ "rx_pp_alloc_waive",
+ "rx_pp_recycle_cached",
+ "rx_pp_recycle_cache_full",
+ "rx_pp_recycle_ring",
+ "rx_pp_recycle_ring_full",
+ "rx_pp_recycle_released_ref",
+};
+
+bool page_pool_get_stats(struct page_pool *pool,
+ struct page_pool_stats *stats)
+{
+ int cpu = 0;
+
+ if (!stats)
+ return false;
+
+ /* The caller is responsible to initialize stats. */
+ stats->alloc_stats.fast += pool->alloc_stats.fast;
+ stats->alloc_stats.slow += pool->alloc_stats.slow;
+ stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order;
+ stats->alloc_stats.empty += pool->alloc_stats.empty;
+ stats->alloc_stats.refill += pool->alloc_stats.refill;
+ stats->alloc_stats.waive += pool->alloc_stats.waive;
+
+ for_each_possible_cpu(cpu) {
+ const struct page_pool_recycle_stats *pcpu =
+ per_cpu_ptr(pool->recycle_stats, cpu);
+
+ stats->recycle_stats.cached += pcpu->cached;
+ stats->recycle_stats.cache_full += pcpu->cache_full;
+ stats->recycle_stats.ring += pcpu->ring;
+ stats->recycle_stats.ring_full += pcpu->ring_full;
+ stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(page_pool_get_stats);
+
+u8 *page_pool_ethtool_stats_get_strings(u8 *data)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
+ memcpy(data, pp_stats[i], ETH_GSTRING_LEN);
+ data += ETH_GSTRING_LEN;
+ }
+
+ return data;
+}
+EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);
+
+int page_pool_ethtool_stats_get_count(void)
+{
+ return ARRAY_SIZE(pp_stats);
+}
+EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
+
+u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+{
+ struct page_pool_stats *pool_stats = stats;
+
+ *data++ = pool_stats->alloc_stats.fast;
+ *data++ = pool_stats->alloc_stats.slow;
+ *data++ = pool_stats->alloc_stats.slow_high_order;
+ *data++ = pool_stats->alloc_stats.empty;
+ *data++ = pool_stats->alloc_stats.refill;
+ *data++ = pool_stats->alloc_stats.waive;
+ *data++ = pool_stats->recycle_stats.cached;
+ *data++ = pool_stats->recycle_stats.cache_full;
+ *data++ = pool_stats->recycle_stats.ring;
+ *data++ = pool_stats->recycle_stats.ring_full;
+ *data++ = pool_stats->recycle_stats.released_refcnt;
+
+ return data;
+}
+EXPORT_SYMBOL(page_pool_ethtool_stats_get);
+
+#else
+#define alloc_stat_inc(pool, __stat)
+#define recycle_stat_inc(pool, __stat)
+#define recycle_stat_add(pool, __stat, val)
+#endif
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -73,6 +180,12 @@ static int page_pool_init(struct page_pool *pool,
pool->p.flags & PP_FLAG_PAGE_FRAG)
return -EINVAL;
+#ifdef CONFIG_PAGE_POOL_STATS
+ pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
+ if (!pool->recycle_stats)
+ return -ENOMEM;
+#endif
+
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
@@ -117,8 +230,10 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
- if (__ptr_ring_empty(r))
+ if (__ptr_ring_empty(r)) {
+ alloc_stat_inc(pool, empty);
return NULL;
+ }
/* Softirq guarantee CPU and thus NUMA node is stable. This,
* assumes CPU refilling driver RX-ring will also run RX-NAPI.
@@ -145,14 +260,17 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
* This limit stress on page buddy alloactor.
*/
page_pool_return_page(pool, page);
+ alloc_stat_inc(pool, waive);
page = NULL;
break;
}
} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
/* Return last page */
- if (likely(pool->alloc.count > 0))
+ if (likely(pool->alloc.count > 0)) {
page = pool->alloc.cache[--pool->alloc.count];
+ alloc_stat_inc(pool, refill);
+ }
return page;
}
@@ -166,6 +284,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
if (likely(pool->alloc.count)) {
/* Fast-path */
page = pool->alloc.cache[--pool->alloc.count];
+ alloc_stat_inc(pool, fast);
} else {
page = page_pool_refill_alloc_cache(pool);
}
@@ -239,6 +358,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
return NULL;
}
+ alloc_stat_inc(pool, slow_high_order);
page_pool_set_pp_info(pool, page);
/* Track how many pages are held 'in-flight' */
@@ -269,7 +389,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
- nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache);
+ nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
+ pool->alloc.cache);
if (unlikely(!nr_pages))
return NULL;
@@ -293,10 +414,12 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
}
/* Return last page */
- if (likely(pool->alloc.count > 0))
+ if (likely(pool->alloc.count > 0)) {
page = pool->alloc.cache[--pool->alloc.count];
- else
+ alloc_stat_inc(pool, slow);
+ } else {
page = NULL;
+ }
/* When page just alloc'ed is should/must have refcnt 1. */
return page;
@@ -394,7 +517,12 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
else
ret = ptr_ring_produce_bh(&pool->ring, page);
- return (ret == 0) ? true : false;
+ if (!ret) {
+ recycle_stat_inc(pool, ring);
+ return true;
+ }
+
+ return false;
}
/* Only allow direct recycling in special circumstances, into the
@@ -405,11 +533,14 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_cache(struct page *page,
struct page_pool *pool)
{
- if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
+ if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
+ recycle_stat_inc(pool, cache_full);
return false;
+ }
/* Caller MUST have verified/know (page_ref_count(page) == 1) */
pool->alloc.cache[pool->alloc.count++] = page;
+ recycle_stat_inc(pool, cached);
return true;
}
@@ -423,11 +554,6 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
- /* It is not the last user for the page frag case */
- if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
- page_pool_atomic_sub_frag_count_return(page, 1))
- return NULL;
-
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -464,6 +590,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* doing refcnt based recycle tricks, meaning another process
* will be invoking put_page.
*/
+ recycle_stat_inc(pool, released_refcnt);
/* Do not replace this with page_pool_return_page() */
page_pool_release_page(pool, page);
put_page(page);
@@ -471,16 +598,17 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
return NULL;
}
-void page_pool_put_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct)
+void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
{
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
/* Cache full, fallback to free pages */
+ recycle_stat_inc(pool, ring_full);
page_pool_return_page(pool, page);
}
}
-EXPORT_SYMBOL(page_pool_put_page);
+EXPORT_SYMBOL(page_pool_put_defragged_page);
/* Caller must not use data area after call, as this function overwrites it */
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
@@ -491,6 +619,10 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
+ /* It is not the last user for the page frag case */
+ if (!page_pool_is_last_frag(pool, page))
+ continue;
+
page = __page_pool_put_page(pool, page, -1, false);
/* Approved for bulk recycling in ptr_ring cache */
if (page)
@@ -503,9 +635,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
/* Bulk producer into ptr_ring page_pool cache */
page_pool_ring_lock(pool);
for (i = 0; i < bulk_len; i++) {
- if (__ptr_ring_produce(&pool->ring, data[i]))
- break; /* ring full */
+ if (__ptr_ring_produce(&pool->ring, data[i])) {
+ /* ring full */
+ recycle_stat_inc(pool, ring_full);
+ break;
+ }
}
+ recycle_stat_add(pool, ring, i);
page_pool_ring_unlock(pool);
/* Hopefully all pages was return into ptr_ring */
@@ -526,8 +662,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
- if (likely(page_pool_atomic_sub_frag_count_return(page,
- drain_count)))
+ if (likely(page_pool_defrag_page(page, drain_count)))
return NULL;
if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
@@ -548,8 +683,7 @@ static void page_pool_free_frag(struct page_pool *pool)
pool->frag_page = NULL;
- if (!page ||
- page_pool_atomic_sub_frag_count_return(page, drain_count))
+ if (!page || page_pool_defrag_page(page, drain_count))
return;
page_pool_return_page(pool, page);
@@ -571,8 +705,10 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
if (page && *offset + size > max_size) {
page = page_pool_drain_frag(pool, page);
- if (page)
+ if (page) {
+ alloc_stat_inc(pool, fast);
goto frag_reset;
+ }
}
if (!page) {
@@ -588,12 +724,13 @@ frag_reset:
pool->frag_users = 1;
*offset = 0;
pool->frag_offset = size;
- page_pool_set_frag_count(page, BIAS_MAX);
+ page_pool_fragment_page(page, BIAS_MAX);
return page;
}
pool->frag_users++;
pool->frag_offset = *offset + size;
+ alloc_stat_inc(pool, fast);
return page;
}
EXPORT_SYMBOL(page_pool_alloc_frag);
@@ -623,6 +760,9 @@ static void page_pool_free(struct page_pool *pool)
if (pool->p.flags & PP_FLAG_DMA_MAP)
put_device(pool->p.dev);
+#ifdef CONFIG_PAGE_POOL_STATS
+ free_percpu(pool->recycle_stats);
+#endif
kfree(pool);
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 560a5e712dc3..c3763056c554 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -546,7 +546,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
static int pgctrl_open(struct inode *inode, struct file *file)
{
- return single_open(file, pgctrl_show, PDE_DATA(inode));
+ return single_open(file, pgctrl_show, pde_data(inode));
}
static const struct proc_ops pktgen_proc_ops = {
@@ -1811,7 +1811,7 @@ static ssize_t pktgen_if_write(struct file *file,
static int pktgen_if_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_if_show, PDE_DATA(inode));
+ return single_open(file, pktgen_if_show, pde_data(inode));
}
static const struct proc_ops pktgen_if_proc_ops = {
@@ -1948,7 +1948,7 @@ out:
static int pktgen_thread_open(struct inode *inode, struct file *file)
{
- return single_open(file, pktgen_thread_show, PDE_DATA(inode));
+ return single_open(file, pktgen_thread_show, pde_data(inode));
}
static const struct proc_ops pktgen_thread_proc_ops = {
@@ -2100,7 +2100,7 @@ static int pktgen_setup_dev(const struct pktgen_net *pn,
/* Clean old setups */
if (pkt_dev->odev) {
- dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker);
+ netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker);
pkt_dev->odev = NULL;
}
@@ -2324,7 +2324,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
pkt_dev->curfl = 0; /*reset */
}
} else {
- flow = prandom_u32() % pkt_dev->cflows;
+ flow = prandom_u32_max(pkt_dev->cflows);
pkt_dev->curfl = flow;
if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
@@ -2380,10 +2380,9 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
__u16 t;
if (pkt_dev->flags & F_QUEUE_MAP_RND) {
- t = prandom_u32() %
- (pkt_dev->queue_map_max -
- pkt_dev->queue_map_min + 1)
- + pkt_dev->queue_map_min;
+ t = prandom_u32_max(pkt_dev->queue_map_max -
+ pkt_dev->queue_map_min + 1) +
+ pkt_dev->queue_map_min;
} else {
t = pkt_dev->cur_queue_map + 1;
if (t > pkt_dev->queue_map_max)
@@ -2412,7 +2411,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACSRC_RND)
- mc = prandom_u32() % pkt_dev->src_mac_count;
+ mc = prandom_u32_max(pkt_dev->src_mac_count);
else {
mc = pkt_dev->cur_src_mac_offset++;
if (pkt_dev->cur_src_mac_offset >=
@@ -2438,7 +2437,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACDST_RND)
- mc = prandom_u32() % pkt_dev->dst_mac_count;
+ mc = prandom_u32_max(pkt_dev->dst_mac_count);
else {
mc = pkt_dev->cur_dst_mac_offset++;
@@ -2465,23 +2464,23 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
- ((__force __be32)prandom_u32() &
+ ((__force __be32)get_random_u32() &
htonl(0x000fffff));
}
if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
- pkt_dev->vlan_id = prandom_u32() & (4096 - 1);
+ pkt_dev->vlan_id = prandom_u32_max(4096);
}
if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
- pkt_dev->svlan_id = prandom_u32() & (4096 - 1);
+ pkt_dev->svlan_id = prandom_u32_max(4096);
}
if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
if (pkt_dev->flags & F_UDPSRC_RND)
- pkt_dev->cur_udp_src = prandom_u32() %
- (pkt_dev->udp_src_max - pkt_dev->udp_src_min)
- + pkt_dev->udp_src_min;
+ pkt_dev->cur_udp_src = prandom_u32_max(
+ pkt_dev->udp_src_max - pkt_dev->udp_src_min) +
+ pkt_dev->udp_src_min;
else {
pkt_dev->cur_udp_src++;
@@ -2492,9 +2491,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
if (pkt_dev->flags & F_UDPDST_RND) {
- pkt_dev->cur_udp_dst = prandom_u32() %
- (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
- + pkt_dev->udp_dst_min;
+ pkt_dev->cur_udp_dst = prandom_u32_max(
+ pkt_dev->udp_dst_max - pkt_dev->udp_dst_min) +
+ pkt_dev->udp_dst_min;
} else {
pkt_dev->cur_udp_dst++;
if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
@@ -2509,7 +2508,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (imn < imx) {
__u32 t;
if (pkt_dev->flags & F_IPSRC_RND)
- t = prandom_u32() % (imx - imn) + imn;
+ t = prandom_u32_max(imx - imn) + imn;
else {
t = ntohl(pkt_dev->cur_saddr);
t++;
@@ -2531,8 +2530,8 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->flags & F_IPDST_RND) {
do {
- t = prandom_u32() %
- (imx - imn) + imn;
+ t = prandom_u32_max(imx - imn) +
+ imn;
s = htonl(t);
} while (ipv4_is_loopback(s) ||
ipv4_is_multicast(s) ||
@@ -2569,7 +2568,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < 4; i++) {
pkt_dev->cur_in6_daddr.s6_addr32[i] =
- (((__force __be32)prandom_u32() |
+ (((__force __be32)get_random_u32() |
pkt_dev->min_in6_daddr.s6_addr32[i]) &
pkt_dev->max_in6_daddr.s6_addr32[i]);
}
@@ -2579,9 +2578,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
__u32 t;
if (pkt_dev->flags & F_TXSIZE_RND) {
- t = prandom_u32() %
- (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
- + pkt_dev->min_pkt_size;
+ t = prandom_u32_max(pkt_dev->max_pkt_size -
+ pkt_dev->min_pkt_size) +
+ pkt_dev->min_pkt_size;
} else {
t = pkt_dev->cur_pkt_size + 1;
if (t > pkt_dev->max_pkt_size)
@@ -2590,7 +2589,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->cur_pkt_size = t;
} else if (pkt_dev->n_imix_entries > 0) {
struct imix_pkt *entry;
- __u32 t = prandom_u32() % IMIX_PRECISION;
+ __u32 t = prandom_u32_max(IMIX_PRECISION);
__u8 entry_index = pkt_dev->imix_distribution[t];
entry = &pkt_dev->imix_entries[entry_index];
@@ -3807,7 +3806,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
return add_dev_to_thread(t, pkt_dev);
out2:
- dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker);
+ netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker);
out1:
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
@@ -3901,7 +3900,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
/* Dis-associate from the interface */
if (pkt_dev->odev) {
- dev_put_track(pkt_dev->odev, &pkt_dev->dev_tracker);
+ netdev_put(pkt_dev->odev, &pkt_dev->dev_tracker);
pkt_dev->odev = NULL;
}
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index dd4cf01d1e0a..598041b0499e 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -137,6 +137,18 @@ struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type)
}
EXPORT_SYMBOL_GPL(ptp_parse_header);
+bool ptp_msg_is_sync(struct sk_buff *skb, unsigned int type)
+{
+ struct ptp_header *hdr;
+
+ hdr = ptp_parse_header(skb, type);
+ if (!hdr)
+ return false;
+
+ return ptp_get_msgtype(hdr, type) == PTP_MSGTYPE_SYNC;
+}
+EXPORT_SYMBOL_GPL(ptp_msg_is_sync);
+
void __init ptp_classifier_init(void)
{
static struct sock_filter ptp_filter[] __initdata = {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e476403231f0..74864dc46a7e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -54,6 +54,8 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include "dev.h"
+
#define RTNL_MAX_TYPE 50
#define RTNL_SLAVE_MAX_TYPE 40
@@ -95,6 +97,39 @@ void __rtnl_unlock(void)
defer_kfree_skb_list = NULL;
+ /* Ensure that we didn't actually add any TODO item when __rtnl_unlock()
+ * is used. In some places, e.g. in cfg80211, we have code that will do
+ * something like
+ * rtnl_lock()
+ * wiphy_lock()
+ * ...
+ * rtnl_unlock()
+ *
+ * and because netdev_run_todo() acquires the RTNL for items on the list
+ * we could cause a situation such as this:
+ * Thread 1 Thread 2
+ * rtnl_lock()
+ * unregister_netdevice()
+ * __rtnl_unlock()
+ * rtnl_lock()
+ * wiphy_lock()
+ * rtnl_unlock()
+ * netdev_run_todo()
+ * __rtnl_unlock()
+ *
+ * // list not empty now
+ * // because of thread 2
+ * rtnl_lock()
+ * while (!list_empty(...))
+ * rtnl_lock()
+ * wiphy_lock()
+ * **** DEADLOCK ****
+ *
+ * However, usage of __rtnl_unlock() is rare, and so we can ensure that
+ * it's not used in cases where something is added to do the list.
+ */
+ WARN_ON(!list_empty(&net_todo_list));
+
mutex_unlock(&rtnl_mutex);
while (head) {
@@ -214,6 +249,8 @@ static int rtnl_register_internal(struct module *owner,
if (dumpit)
link->dumpit = dumpit;
+ WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL &&
+ (flags & RTNL_FLAG_BULK_DEL_SUPPORTED));
link->flags |= flags;
/* publish protocol:msgtype */
@@ -459,7 +496,7 @@ static void rtnl_lock_unregistering_all(void)
* setup_net() and cleanup_net() are not possible.
*/
for_each_net(net) {
- if (net->dev_unreg_count > 0) {
+ if (atomic_read(&net->dev_unreg_count) > 0) {
unregistering = true;
break;
}
@@ -829,14 +866,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
break;
case IF_OPER_TESTING:
- if (operstate == IF_OPER_UP ||
- operstate == IF_OPER_UNKNOWN)
+ if (netif_oper_up(dev))
operstate = IF_OPER_TESTING;
break;
case IF_OPER_DORMANT:
- if (operstate == IF_OPER_UP ||
- operstate == IF_OPER_UNKNOWN)
+ if (netif_oper_up(dev))
operstate = IF_OPER_DORMANT;
break;
}
@@ -1022,11 +1057,14 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_MASTER */
+ nla_total_size(1) /* IFLA_CARRIER */
+ nla_total_size(4) /* IFLA_PROMISCUITY */
+ + nla_total_size(4) /* IFLA_ALLMULTI */
+ nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
+ nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1699,6 +1737,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
+ struct Qdisc *qdisc;
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1716,6 +1755,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
+ qdisc = rtnl_dereference(dev->qdisc);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1726,17 +1766,20 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
nla_put_u32(skb, IFLA_GROUP, dev->group) ||
nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
+ nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) ||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
- (dev->qdisc &&
- nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
+ (qdisc &&
+ nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
@@ -1883,6 +1926,9 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
[IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
[IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT },
+ [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT },
+ [IFLA_ALLMULTI] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2267,6 +2313,19 @@ invalid_attr:
return -EINVAL;
}
+static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_set_vf_rate)
+ return -EOPNOTSUPP;
+ if (max_tx_rate && max_tx_rate < min_tx_rate)
+ return -EINVAL;
+
+ return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate);
+}
+
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
@@ -2302,14 +2361,6 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
}
}
- if (tb[IFLA_GRO_MAX_SIZE]) {
- u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]);
-
- if (gro_max_size > GRO_MAX_SIZE) {
- NL_SET_ERR_MSG(extack, "too big gro_max_size");
- return -EINVAL;
- }
- }
return 0;
}
@@ -2404,11 +2455,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (err < 0)
return err;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivf.min_tx_rate,
- ivt->rate);
+ err = rtnl_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate, ivt->rate);
if (err < 0)
return err;
}
@@ -2418,11 +2466,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (ivt->vf >= INT_MAX)
return -EINVAL;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivt->min_tx_rate,
- ivt->max_tx_rate);
+
+ err = rtnl_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate, ivt->max_tx_rate);
if (err < 0)
return err;
}
@@ -2605,17 +2651,23 @@ static int do_set_proto_down(struct net_device *dev,
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
struct netlink_ext_ack *extack,
- struct nlattr **tb, char *ifname, int status)
+ struct nlattr **tb, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ char ifname[IFNAMSIZ];
int err;
err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
+ if (tb[IFLA_IFNAME])
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ else
+ ifname[0] = '\0';
+
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
- const char *pat = ifname && ifname[0] ? ifname : NULL;
+ const char *pat = ifname[0] ? ifname : NULL;
struct net *net;
int new_ifindex;
@@ -2725,13 +2777,6 @@ static int do_setlink(const struct sk_buff *skb,
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
}
- if (ifm->ifi_flags || ifm->ifi_change) {
- err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
- extack);
- if (err < 0)
- goto errout;
- }
-
if (tb[IFLA_MASTER]) {
err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
@@ -2739,6 +2784,13 @@ static int do_setlink(const struct sk_buff *skb,
status |= DO_SETLINK_MODIFIED;
}
+ if (ifm->ifi_flags || ifm->ifi_change) {
+ err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+ extack);
+ if (err < 0)
+ goto errout;
+ }
+
if (tb[IFLA_CARRIER]) {
err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
if (err)
@@ -2758,7 +2810,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SIZE]) {
u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
- if (max_size > GSO_MAX_SIZE) {
+ if (max_size > dev->tso_max_size) {
err = -EINVAL;
goto errout;
}
@@ -2772,7 +2824,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SEGS]) {
u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
- if (max_segs > GSO_MAX_SEGS) {
+ if (max_segs > GSO_MAX_SEGS || max_segs > dev->tso_max_segs) {
err = -EINVAL;
goto errout;
}
@@ -2971,21 +3023,16 @@ errout:
}
static struct net_device *rtnl_dev_get(struct net *net,
- struct nlattr *ifname_attr,
- struct nlattr *altifname_attr,
- char *ifname)
-{
- char buffer[ALTIFNAMSIZ];
-
- if (!ifname) {
- ifname = buffer;
- if (ifname_attr)
- nla_strscpy(ifname, ifname_attr, IFNAMSIZ);
- else if (altifname_attr)
- nla_strscpy(ifname, altifname_attr, ALTIFNAMSIZ);
- else
- return NULL;
- }
+ struct nlattr *tb[])
+{
+ char ifname[ALTIFNAMSIZ];
+
+ if (tb[IFLA_IFNAME])
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ else if (tb[IFLA_ALT_IFNAME])
+ nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ);
+ else
+ return NULL;
return __dev_get_by_name(net, ifname);
}
@@ -2998,7 +3045,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev;
int err;
struct nlattr *tb[IFLA_MAX+1];
- char ifname[IFNAMSIZ];
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
ifla_policy, extack);
@@ -3009,17 +3055,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- if (tb[IFLA_IFNAME])
- nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
- else
- ifname[0] = '\0';
-
err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
+ dev = rtnl_dev_get(net, tb);
else
goto errout;
@@ -3028,7 +3069,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, 0);
errout:
return err;
}
@@ -3117,15 +3158,14 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
- tb[IFLA_ALT_IFNAME], NULL);
+ dev = rtnl_dev_get(net, tb);
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
goto out;
if (!dev) {
- if (tb[IFLA_IFNAME] || ifm->ifi_index > 0)
+ if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0)
err = -ENODEV;
goto out;
@@ -3260,7 +3300,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
- err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, 0);
if (err < 0)
return err;
}
@@ -3269,24 +3309,118 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attr, struct netlink_ext_ack *extack)
+static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
+ const struct rtnl_link_ops *ops,
+ struct nlattr **tb, struct nlattr **data,
+ struct netlink_ext_ack *extack)
{
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
unsigned char name_assign_type = NET_NAME_USER;
+ struct net *net = sock_net(skb->sk);
+ struct net *dest_net, *link_net;
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ int err;
+
+ if (!ops->alloc && !ops->setup)
+ return -EOPNOTSUPP;
+
+ if (tb[IFLA_IFNAME]) {
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ } else {
+ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
+
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ err = -EINVAL;
+ goto out;
+ }
+ err = -EPERM;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+ } else {
+ link_net = NULL;
+ }
+
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb, extack);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ goto out;
+ }
+
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink)
+ err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ else
+ err = register_netdevice(dev);
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
+ }
+
+ err = rtnl_configure_link(dev, ifm);
+ if (err < 0)
+ goto out_unregister;
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
+ if (err < 0)
+ goto out_unregister;
+ }
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto out_unregister;
+ }
+out:
+ if (link_net)
+ put_net(link_net);
+ put_net(dest_net);
+ return err;
+out_unregister:
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
+ }
+ goto out;
+}
+
+struct rtnl_newlink_tbs {
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct nlattr *attr[RTNL_MAX_TYPE + 1];
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+};
+
+static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtnl_newlink_tbs *tbs,
+ struct netlink_ext_ack *extack)
+{
struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
- const struct rtnl_link_ops *m_ops = NULL;
- struct net_device *master_dev = NULL;
+ struct nlattr ** const tb = tbs->tb;
+ const struct rtnl_link_ops *m_ops;
+ struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
- struct nlattr *tb[IFLA_MAX + 1];
- struct net *dest_net, *link_net;
struct nlattr **slave_data;
char kind[MODULE_NAME_LEN];
struct net_device *dev;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr **data;
+ bool link_specified;
int err;
#ifdef CONFIG_MODULES
@@ -3301,19 +3435,20 @@ replay:
if (err < 0)
return err;
- if (tb[IFLA_IFNAME])
- nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
- else
- ifname[0] = '\0';
-
ifm = nlmsg_data(nlh);
- if (ifm->ifi_index > 0)
+ if (ifm->ifi_index > 0) {
+ link_specified = true;
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
- else
+ } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
+ link_specified = true;
+ dev = rtnl_dev_get(net, tb);
+ } else {
+ link_specified = false;
dev = NULL;
+ }
+ master_dev = NULL;
+ m_ops = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
if (master_dev)
@@ -3347,12 +3482,12 @@ replay:
return -EINVAL;
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested_deprecated(attr, ops->maxtype,
+ err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
ops->policy, extack);
if (err < 0)
return err;
- data = attr;
+ data = tbs->attr;
}
if (ops->validate) {
err = ops->validate(tb, data, extack);
@@ -3368,14 +3503,14 @@ replay:
if (m_ops->slave_maxtype &&
linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested_deprecated(slave_attr,
+ err = nla_parse_nested_deprecated(tbs->slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
m_ops->slave_policy,
extack);
if (err < 0)
return err;
- slave_data = slave_attr;
+ slave_data = tbs->slave_attr;
}
}
@@ -3409,11 +3544,16 @@ replay:
status |= DO_SETLINK_NOTIFY;
}
- return do_setlink(skb, dev, ifm, extack, tb, ifname, status);
+ return do_setlink(skb, dev, ifm, extack, tb, status);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
+ * or it's for a group
+ */
+ if (link_specified)
+ return -ENODEV;
+ if (tb[IFLA_GROUP])
return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, extack, tb);
@@ -3438,94 +3578,21 @@ replay:
return -EOPNOTSUPP;
}
- if (!ops->alloc && !ops->setup)
- return -EOPNOTSUPP;
-
- if (!ifname[0]) {
- snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- name_assign_type = NET_NAME_ENUM;
- }
-
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
-
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
-
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- NL_SET_ERR_MSG(extack, "Unknown network namespace id");
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- } else {
- link_net = NULL;
- }
-
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
- name_assign_type, ops, tb, extack);
- if (IS_ERR(dev)) {
- err = PTR_ERR(dev);
- goto out;
- }
-
- dev->ifindex = ifm->ifi_index;
-
- if (ops->newlink)
- err = ops->newlink(link_net ? : net, dev, tb, data, extack);
- else
- err = register_netdevice(dev);
- if (err < 0) {
- free_netdev(dev);
- goto out;
- }
-
- err = rtnl_configure_link(dev, ifm);
- if (err < 0)
- goto out_unregister;
- if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
- if (err < 0)
- goto out_unregister;
- }
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
- if (err)
- goto out_unregister;
- }
-out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
- return err;
-out_unregister:
- if (ops->newlink) {
- LIST_HEAD(list_kill);
-
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- } else {
- unregister_netdevice(dev);
- }
- goto out;
+ return rtnl_newlink_create(skb, ifm, ops, tb, data, extack);
}
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct nlattr **attr;
+ struct rtnl_newlink_tbs *tbs;
int ret;
- attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL);
- if (!attr)
+ tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
+ if (!tbs)
return -ENOMEM;
- ret = __rtnl_newlink(skb, nlh, attr, extack);
- kfree(attr);
+ ret = __rtnl_newlink(skb, nlh, tbs, extack);
+ kfree(tbs);
return ret;
}
@@ -3613,8 +3680,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME],
- tb[IFLA_ALT_IFNAME], NULL);
+ dev = rtnl_dev_get(tgt_net, tb);
else
goto out;
@@ -3648,13 +3714,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
bool *changed, struct netlink_ext_ack *extack)
{
char *alt_ifname;
+ size_t size;
int err;
err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
if (err)
return err;
- alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (cmd == RTM_NEWLINKPROP) {
+ size = rtnl_prop_list_size(dev);
+ size += nla_total_size(ALTIFNAMSIZ);
+ if (size >= U16_MAX) {
+ NL_SET_ERR_MSG(extack,
+ "effective property list too long");
+ return -EINVAL;
+ }
+ }
+
+ alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (!alt_ifname)
return -ENOMEM;
@@ -3698,8 +3775,7 @@ static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
- tb[IFLA_ALT_IFNAME], NULL);
+ dev = rtnl_dev_get(net, tb);
else
return -EINVAL;
@@ -4117,22 +4193,36 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
+static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = {
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
+ [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
+};
+
static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
struct net *net = sock_net(skb->sk);
+ const struct net_device_ops *ops;
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
- __u8 *addr;
+ __u8 *addr = NULL;
int err;
u16 vid;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL,
- extack);
+ if (!del_bulk) {
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
+ NULL, extack);
+ } else {
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX,
+ fdb_del_bulk_policy, extack);
+ }
if (err < 0)
return err;
@@ -4148,9 +4238,12 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
}
- if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- NL_SET_ERR_MSG(extack, "invalid address");
- return -EINVAL;
+ if (!del_bulk) {
+ if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "invalid address");
+ return -EINVAL;
+ }
+ addr = nla_data(tb[NDA_LLADDR]);
}
if (dev->type != ARPHRD_ETHER) {
@@ -4158,8 +4251,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
- addr = nla_data(tb[NDA_LLADDR]);
-
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -4170,10 +4261,16 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
- const struct net_device_ops *ops = br_dev->netdev_ops;
- if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ ops = br_dev->netdev_ops;
+ if (!del_bulk) {
+ if (ops->ndo_fdb_del)
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ } else {
+ if (ops->ndo_fdb_del_bulk)
+ err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
+ extack);
+ }
if (err)
goto out;
@@ -4183,15 +4280,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
- if (dev->netdev_ops->ndo_fdb_del)
- err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
- vid);
- else
- err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
+ ops = dev->netdev_ops;
+ if (!del_bulk) {
+ if (ops->ndo_fdb_del)
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ else
+ err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
+ } else {
+ /* in case err was cleared by NTF_MASTER call */
+ err = -EOPNOTSUPP;
+ if (ops->ndo_fdb_del_bulk)
+ err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
+ extack);
+ }
if (!err) {
- rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
- ndm->ndm_state);
+ if (!del_bulk)
+ rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
+ ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -5044,82 +5150,259 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
(!idxattr || idxattr == attrid);
}
-#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
-static int rtnl_get_offload_stats_attr_size(int attr_id)
+static bool
+rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id)
{
- switch (attr_id) {
- case IFLA_OFFLOAD_XSTATS_CPU_HIT:
- return sizeof(struct rtnl_link_stats64);
- }
+ return dev->netdev_ops &&
+ dev->netdev_ops->ndo_has_offload_stats &&
+ dev->netdev_ops->ndo_get_offload_stats &&
+ dev->netdev_ops->ndo_has_offload_stats(dev, attr_id);
+}
- return 0;
+static unsigned int
+rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id)
+{
+ return rtnl_offload_xstats_have_ndo(dev, attr_id) ?
+ sizeof(struct rtnl_link_stats64) : 0;
}
-static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
- int *prividx)
+static int
+rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id,
+ struct sk_buff *skb)
{
+ unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id);
struct nlattr *attr = NULL;
- int attr_id, size;
void *attr_data;
int err;
- if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
- dev->netdev_ops->ndo_get_offload_stats))
+ if (!size)
return -ENODATA;
- for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
- attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (attr_id < *prividx)
- continue;
+ attr = nla_reserve_64bit(skb, attr_id, size,
+ IFLA_OFFLOAD_XSTATS_UNSPEC);
+ if (!attr)
+ return -EMSGSIZE;
- size = rtnl_get_offload_stats_attr_size(attr_id);
- if (!size)
- continue;
+ attr_data = nla_data(attr);
+ memset(attr_data, 0, size);
- if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
- continue;
+ err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static unsigned int
+rtnl_offload_xstats_get_size_stats(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ bool enabled = netdev_offload_xstats_enabled(dev, type);
+
+ return enabled ? sizeof(struct rtnl_hw_stats64) : 0;
+}
+
+struct rtnl_offload_xstats_request_used {
+ bool request;
+ bool used;
+};
+
+static int
+rtnl_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_offload_xstats_request_used *ru,
+ struct rtnl_hw_stats64 *stats,
+ struct netlink_ext_ack *extack)
+{
+ bool request;
+ bool used;
+ int err;
+
+ request = netdev_offload_xstats_enabled(dev, type);
+ if (!request) {
+ used = false;
+ goto out;
+ }
+
+ err = netdev_offload_xstats_get(dev, type, stats, &used, extack);
+ if (err)
+ return err;
+
+out:
+ if (ru) {
+ ru->request = request;
+ ru->used = used;
+ }
+ return 0;
+}
+
+static int
+rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id,
+ struct rtnl_offload_xstats_request_used *ru)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr_id);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
- attr = nla_reserve_64bit(skb, attr_id, size,
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int
+rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ struct rtnl_offload_xstats_request_used ru_l3;
+ struct nlattr *nest;
+ int err;
+
+ err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack);
+ if (err)
+ return err;
+
+ nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (rtnl_offload_xstats_fill_hw_s_info_one(skb,
+ IFLA_OFFLOAD_XSTATS_L3_STATS,
+ &ru_l3))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev,
+ int *prividx, u32 off_filter_mask,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO;
+ int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS;
+ int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
+ bool have_data = false;
+ int err;
+
+ if (*prividx <= attr_id_cpu_hit &&
+ (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) {
+ err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb);
+ if (!err) {
+ have_data = true;
+ } else if (err != -ENODATA) {
+ *prividx = attr_id_cpu_hit;
+ return err;
+ }
+ }
+
+ if (*prividx <= attr_id_hw_s_info &&
+ (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) {
+ *prividx = attr_id_hw_s_info;
+
+ err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack);
+ if (err)
+ return err;
+
+ have_data = true;
+ *prividx = 0;
+ }
+
+ if (*prividx <= attr_id_l3_stats &&
+ (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) {
+ unsigned int size_l3;
+ struct nlattr *attr;
+
+ *prividx = attr_id_l3_stats;
+
+ size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3);
+ if (!size_l3)
+ goto skip_l3_stats;
+ attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3,
IFLA_OFFLOAD_XSTATS_UNSPEC);
if (!attr)
- goto nla_put_failure;
+ return -EMSGSIZE;
- attr_data = nla_data(attr);
- memset(attr_data, 0, size);
- err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
- attr_data);
+ err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL,
+ nla_data(attr), extack);
if (err)
- goto get_offload_stats_failure;
+ return err;
+
+ have_data = true;
+skip_l3_stats:
+ *prividx = 0;
}
- if (!attr)
+ if (!have_data)
return -ENODATA;
*prividx = 0;
return 0;
+}
-nla_put_failure:
- err = -EMSGSIZE;
-get_offload_stats_failure:
- *prividx = attr_id;
- return err;
+static unsigned int
+rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ bool enabled = netdev_offload_xstats_enabled(dev, type);
+
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
+ nla_total_size(sizeof(u8)) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
+ (enabled ? nla_total_size(sizeof(u8)) : 0) +
+ 0;
+}
+
+static unsigned int
+rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_L3_STATS */
+ rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) +
+ 0;
}
-static int rtnl_get_offload_stats_size(const struct net_device *dev)
+static int rtnl_offload_xstats_get_size(const struct net_device *dev,
+ u32 off_filter_mask)
{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
int nla_size = 0;
- int attr_id;
int size;
- if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
- dev->netdev_ops->ndo_get_offload_stats))
- return 0;
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) {
+ size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit);
+ nla_size += nla_total_size_64bit(size);
+ }
- for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
- attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
- continue;
- size = rtnl_get_offload_stats_attr_size(attr_id);
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO))
+ nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev);
+
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) {
+ size = rtnl_offload_xstats_get_size_stats(dev, t_l3);
nla_size += nla_total_size_64bit(size);
}
@@ -5129,11 +5412,21 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
return nla_size;
}
+struct rtnl_stats_dump_filters {
+ /* mask[0] filters outer attributes. Then individual nests have their
+ * filtering mask at the index of the nested attribute.
+ */
+ u32 mask[IFLA_STATS_MAX + 1];
+};
+
static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
- unsigned int flags, unsigned int filter_mask,
- int *idxattr, int *prividx)
+ unsigned int flags,
+ const struct rtnl_stats_dump_filters *filters,
+ int *idxattr, int *prividx,
+ struct netlink_ext_ack *extack)
{
+ unsigned int filter_mask = filters->mask[0];
struct if_stats_msg *ifsm;
struct nlmsghdr *nlh;
struct nlattr *attr;
@@ -5159,8 +5452,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
sizeof(struct rtnl_link_stats64),
IFLA_STATS_UNSPEC);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
sp = nla_data(attr);
dev_get_stats(dev, sp);
@@ -5173,8 +5468,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_LINK_XSTATS;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_XSTATS);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
@@ -5196,8 +5493,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_XSTATS_SLAVE);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
@@ -5209,13 +5508,19 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
*idxattr)) {
+ u32 off_filter_mask;
+
+ off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_OFFLOAD_XSTATS);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
- err = rtnl_get_offload_stats(skb, dev, prividx);
+ err = rtnl_offload_xstats_fill(skb, dev, prividx,
+ off_filter_mask, extack);
if (err == -ENODATA)
nla_nest_cancel(skb, attr);
else
@@ -5231,19 +5536,21 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_AF_SPEC;
attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
rcu_read_lock();
list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
- int err;
af = nla_nest_start_noflag(skb,
af_ops->family);
if (!af) {
rcu_read_unlock();
+ err = -EMSGSIZE;
goto nla_put_failure;
}
err = af_ops->fill_stats_af(skb, dev);
@@ -5276,13 +5583,14 @@ nla_put_failure:
else
nlmsg_end(skb, nlh);
- return -EMSGSIZE;
+ return err;
}
static size_t if_nlmsg_stats_size(const struct net_device *dev,
- u32 filter_mask)
+ const struct rtnl_stats_dump_filters *filters)
{
size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
+ unsigned int filter_mask = filters->mask[0];
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
@@ -5318,8 +5626,12 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
}
}
- if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
- size += rtnl_get_offload_stats_size(dev);
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) {
+ u32 off_filter_mask;
+
+ off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
+ size += rtnl_offload_xstats_get_size(dev, off_filter_mask);
+ }
if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
struct rtnl_af_ops *af_ops;
@@ -5343,6 +5655,79 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
return size;
}
+#define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1)
+
+static const struct nla_policy
+rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = {
+ [IFLA_STATS_LINK_OFFLOAD_XSTATS] =
+ NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID),
+};
+
+static const struct nla_policy
+rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = {
+ [IFLA_STATS_GET_FILTERS] =
+ NLA_POLICY_NESTED(rtnl_stats_get_policy_filters),
+};
+
+static const struct nla_policy
+ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = {
+ [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1),
+};
+
+static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters,
+ struct rtnl_stats_dump_filters *filters,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_STATS_MAX + 1];
+ int err;
+ int at;
+
+ err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters,
+ rtnl_stats_get_policy_filters, extack);
+ if (err < 0)
+ return err;
+
+ for (at = 1; at <= IFLA_STATS_MAX; at++) {
+ if (tb[at]) {
+ if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) {
+ NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask");
+ return -EINVAL;
+ }
+ filters->mask[at] = nla_get_u32(tb[at]);
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_stats_get_parse(const struct nlmsghdr *nlh,
+ u32 filter_mask,
+ struct rtnl_stats_dump_filters *filters,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
+ int err;
+ int i;
+
+ filters->mask[0] = filter_mask;
+ for (i = 1; i < ARRAY_SIZE(filters->mask); i++)
+ filters->mask[i] = -1U;
+
+ err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb,
+ IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_STATS_GET_FILTERS]) {
+ err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS],
+ filters, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
bool is_dump, struct netlink_ext_ack *extack)
{
@@ -5365,10 +5750,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
return -EINVAL;
}
- if (nlmsg_attrlen(nlh, sizeof(*ifsm))) {
- NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
- return -EINVAL;
- }
if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) {
NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask");
return -EINVAL;
@@ -5380,12 +5761,12 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
int idxattr = 0, prividx = 0;
struct if_stats_msg *ifsm;
struct sk_buff *nskb;
- u32 filter_mask;
int err;
err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
@@ -5402,17 +5783,22 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!dev)
return -ENODEV;
- filter_mask = ifsm->filter_mask;
- if (!filter_mask)
+ if (!ifsm->filter_mask) {
+ NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get");
return -EINVAL;
+ }
+
+ err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack);
+ if (err)
+ return err;
- nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
+ nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL);
if (!nskb)
return -ENOBUFS;
err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- 0, filter_mask, &idxattr, &prividx);
+ 0, &filters, &idxattr, &prividx, extack);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
WARN_ON(err == -EMSGSIZE);
@@ -5428,12 +5814,12 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
int h, s_h, err, s_idx, s_idxattr, s_prividx;
+ struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
struct hlist_head *head;
struct net_device *dev;
- u32 filter_mask = 0;
int idx = 0;
s_h = cb->args[0];
@@ -5448,12 +5834,16 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
return err;
ifsm = nlmsg_data(cb->nlh);
- filter_mask = ifsm->filter_mask;
- if (!filter_mask) {
+ if (!ifsm->filter_mask) {
NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
return -EINVAL;
}
+ err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters,
+ extack);
+ if (err)
+ return err;
+
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
@@ -5463,8 +5853,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
- flags, filter_mask,
- &s_idxattr, &s_prividx);
+ flags, &filters,
+ &s_idxattr, &s_prividx,
+ extack);
/* If we ran out of room on the first message,
* we're in trouble
*/
@@ -5488,6 +5879,107 @@ out:
return skb->len;
}
+void rtnl_offload_xstats_notify(struct net_device *dev)
+{
+ struct rtnl_stats_dump_filters response_filters = {};
+ struct net *net = dev_net(dev);
+ int idxattr = 0, prividx = 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ ASSERT_RTNL();
+
+ response_filters.mask[0] |=
+ IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+
+ skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters),
+ GFP_KERNEL);
+ if (!skb)
+ goto errout;
+
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0,
+ &response_filters, &idxattr, &prividx, NULL);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL);
+ return;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_STATS, err);
+}
+EXPORT_SYMBOL(rtnl_offload_xstats_notify);
+
+static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ struct rtnl_stats_dump_filters response_filters = {};
+ struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev = NULL;
+ struct if_stats_msg *ifsm;
+ bool notify = false;
+ int err;
+
+ err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
+ false, extack);
+ if (err)
+ return err;
+
+ ifsm = nlmsg_data(nlh);
+ if (ifsm->family != AF_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC");
+ return -EINVAL;
+ }
+
+ if (ifsm->ifindex > 0)
+ dev = __dev_get_by_index(net, ifsm->ifindex);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ifsm->filter_mask) {
+ NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX,
+ ifla_stats_set_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) {
+ u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]);
+
+ if (req)
+ err = netdev_offload_xstats_enable(dev, t_l3, extack);
+ else
+ err = netdev_offload_xstats_disable(dev, t_l3);
+
+ if (!err)
+ notify = true;
+ else if (err != -EALREADY)
+ return err;
+
+ response_filters.mask[0] |=
+ IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+ }
+
+ if (notify)
+ rtnl_offload_xstats_notify(dev);
+
+ return 0;
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5495,11 +5987,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct rtnl_link *link;
+ enum rtnl_kinds kind;
struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
- int kind;
int family;
int type;
@@ -5514,13 +6006,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
- kind = type&3;
+ kind = rtnl_msgtype_kind(type);
- if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
+ if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
rcu_read_lock();
- if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u32 min_dump_alloc = 0;
@@ -5576,6 +6068,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
flags = link->flags;
+ if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) &&
+ !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) {
+ NL_SET_ERR_MSG(extack, "Bulk delete is not supported");
+ module_put(owner);
+ goto err_unlock;
+ }
+
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
doit = link->doit;
rcu_read_unlock();
@@ -5704,7 +6203,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
+ RTNL_FLAG_BULK_DEL_SUPPORTED);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
@@ -5713,4 +6213,5 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
+ rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9b8443774449..b0ff6153be62 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_aligned_key_t net_secret;
static siphash_aligned_key_t ts_secret;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -62,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net,
.daddr = *(struct in6_addr *)daddr,
};
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -116,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0118f0afaa4f..88fa40571d0c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -80,7 +80,7 @@
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
-#include "datagram.h"
+#include "dev.h"
#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
@@ -91,6 +91,13 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
+#undef FN
+#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
+const char * const drop_reasons[] = {
+ DEFINE_DROP_REASON(FN, FN)
+};
+EXPORT_SYMBOL(drop_reasons);
+
/**
* skb_panic - private function for out-of-line support
* @skb: buffer
@@ -127,8 +134,66 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
+#if PAGE_SIZE == SZ_4K
+
+#define NAPI_HAS_SMALL_PAGE_FRAG 1
+#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
+
+/* specialized page frag allocator using a single order 0 page
+ * and slicing it into 1K sized fragment. Constrained to systems
+ * with a very limited amount of 1K fragments fitting a single
+ * page - to avoid excessive truesize underestimation
+ */
+
+struct page_frag_1k {
+ void *va;
+ u16 offset;
+ bool pfmemalloc;
+};
+
+static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
+{
+ struct page *page;
+ int offset;
+
+ offset = nc->offset - SZ_1K;
+ if (likely(offset >= 0))
+ goto use_frag;
+
+ page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+ if (!page)
+ return NULL;
+
+ nc->va = page_address(page);
+ nc->pfmemalloc = page_is_pfmemalloc(page);
+ offset = PAGE_SIZE - SZ_1K;
+ page_ref_add(page, offset / SZ_1K);
+
+use_frag:
+ nc->offset = offset;
+ return nc->va + offset;
+}
+#else
+
+/* the small page is actually unused in this build; add dummy helpers
+ * to please the compiler and avoid later preprocessor's conditionals
+ */
+#define NAPI_HAS_SMALL_PAGE_FRAG 0
+#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
+
+struct page_frag_1k {
+};
+
+static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
+{
+ return NULL;
+}
+
+#endif
+
struct napi_alloc_cache {
struct page_frag_cache page;
+ struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -136,6 +201,23 @@ struct napi_alloc_cache {
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -172,13 +254,14 @@ static struct sk_buff *napi_skb_cache_get(void)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
- if (unlikely(!nc->skb_count))
+ if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
- if (unlikely(!nc->skb_count))
- return NULL;
+ if (unlikely(!nc->skb_count))
+ return NULL;
+ }
skb = nc->skb_cache[--nc->skb_count];
kasan_unpoison_object_data(skbuff_head_cache, skb);
@@ -201,10 +284,10 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
+ skb_set_end_offset(skb, size);
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
-
+ skb->alloc_cpu = raw_smp_processor_id();
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
@@ -450,8 +533,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->fclone = SKB_FCLONE_ORIG;
refcount_set(&fclones->fclone_ref, 1);
-
- fclones->skb2.fclone = SKB_FCLONE_CLONE;
}
return skb;
@@ -555,14 +636,18 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
{
struct napi_alloc_cache *nc;
struct sk_buff *skb;
+ bool pfmemalloc;
void *data;
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
len += NET_SKB_PAD + NET_IP_ALIGN;
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
+ * When the small frag allocator is available, prefer it over kmalloc
+ * for small fragments
*/
- if (len <= SKB_WITH_OVERHEAD(1024) ||
+ if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -573,13 +658,33 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
}
nc = this_cpu_ptr(&napi_alloc_cache);
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- len = SKB_DATA_ALIGN(len);
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- data = page_frag_alloc(&nc->page, len, gfp_mask);
+ if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
+ /* we are artificially inflating the allocation size, but
+ * that is not as bad as it may look like, as:
+ * - 'len' less than GRO_MAX_HEAD makes little sense
+ * - On most systems, larger 'len' values lead to fragment
+ * size above 512 bytes
+ * - kmalloc would use the kmalloc-1k slab for such values
+ * - Builds with smaller GRO_MAX_HEAD will very likely do
+ * little networking, as that implies no WiFi and no
+ * tunnels support, and 32 bits arches.
+ */
+ len = SZ_1K;
+
+ data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
+ pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
+ } else {
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ len = SKB_DATA_ALIGN(len);
+
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = nc->page.pfmemalloc;
+ }
+
if (unlikely(!data))
return NULL;
@@ -589,7 +694,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- if (nc->page.pfmemalloc)
+ if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -666,11 +771,18 @@ static void skb_release_data(struct sk_buff *skb)
&shinfo->dataref))
goto exit;
- skb_zcopy_clear(skb, true);
+ if (skb_zcopy(skb)) {
+ bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS;
+
+ skb_zcopy_clear(skb, true);
+ if (skip_unref)
+ goto free_head;
+ }
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
+free_head:
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
@@ -681,7 +793,7 @@ exit:
* while trying to recycle fragments on __skb_frag_unref() we need
* to make one SKB responsible for triggering the recycle path.
* So disable the recycling bit if an SKB is cloned and we have
- * additional references to to the fragmented part of the SKB.
+ * additional references to the fragmented part of the SKB.
* Eventually the last SKB will have the recycling bit set and it's
* dataref set to 0, which will trigger the recycling
*/
@@ -725,7 +837,7 @@ void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
if (skb->destructor) {
- WARN_ON(in_hardirq());
+ DEBUG_NET_WARN_ON_ONCE(in_hardirq());
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -767,26 +879,30 @@ EXPORT_SYMBOL(__kfree_skb);
* hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
* tracepoint.
*/
-void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __fix_address
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
- if (!skb_unref(skb))
+ if (unlikely(!skb_unref(skb)))
return;
+ DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+
trace_kfree_skb(skb, __builtin_return_address(0), reason);
__kfree_skb(skb);
}
EXPORT_SYMBOL(kfree_skb_reason);
-void kfree_skb_list(struct sk_buff *segs)
+void kfree_skb_list_reason(struct sk_buff *segs,
+ enum skb_drop_reason reason)
{
while (segs) {
struct sk_buff *next = segs->next;
- kfree_skb(segs);
+ kfree_skb_reason(segs, reason);
segs = next;
}
}
-EXPORT_SYMBOL(kfree_skb_list);
+EXPORT_SYMBOL(kfree_skb_list_reason);
/* Dump skb information and contents.
*
@@ -892,7 +1008,10 @@ EXPORT_SYMBOL(skb_dump);
*/
void skb_tx_error(struct sk_buff *skb)
{
- skb_zcopy_clear(skb, true);
+ if (skb) {
+ skb_zcopy_downgrade_managed(skb);
+ skb_zcopy_clear(skb, true);
+ }
}
EXPORT_SYMBOL(skb_tx_error);
@@ -975,7 +1094,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- lockdep_assert_in_softirq();
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
if (!skb_unref(skb))
return;
@@ -1036,6 +1155,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_NET_RX_BUSY_POLL
CHECK_SKB_FIELD(napi_id);
#endif
+ CHECK_SKB_FIELD(alloc_cpu);
#ifdef CONFIG_XPS
CHECK_SKB_FIELD(sender_cpu);
#endif
@@ -1164,9 +1284,9 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp)
}
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
-struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
+static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
- struct ubuf_info *uarg;
+ struct ubuf_info_msgzc *uarg;
struct sk_buff *skb;
WARN_ON_ONCE(!in_task());
@@ -1184,20 +1304,19 @@ struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}
- uarg->callback = msg_zerocopy_callback;
+ uarg->ubuf.callback = msg_zerocopy_callback;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
uarg->zerocopy = 1;
- uarg->flags = SKBFL_ZEROCOPY_FRAG;
- refcount_set(&uarg->refcnt, 1);
+ uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+ refcount_set(&uarg->ubuf.refcnt, 1);
sock_hold(sk);
- return uarg;
+ return &uarg->ubuf;
}
-EXPORT_SYMBOL_GPL(msg_zerocopy_alloc);
-static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
{
return container_of((void *)uarg, struct sk_buff, cb);
}
@@ -1206,9 +1325,14 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg)
{
if (uarg) {
+ struct ubuf_info_msgzc *uarg_zc;
const u32 byte_limit = 1 << 19; /* limit to a few TSO */
u32 bytelen, next;
+ /* there might be non MSG_ZEROCOPY users */
+ if (uarg->callback != msg_zerocopy_callback)
+ return NULL;
+
/* realloc only when socket is locked (TCP, UDP cork),
* so uarg->len and sk_zckey access is serialized
*/
@@ -1217,8 +1341,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
return NULL;
}
- bytelen = uarg->bytelen + size;
- if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+ uarg_zc = uarg_to_msgzc(uarg);
+ bytelen = uarg_zc->bytelen + size;
+ if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) {
/* TCP can create new skb to attach new uarg */
if (sk->sk_type == SOCK_STREAM)
goto new_alloc;
@@ -1226,11 +1351,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
}
next = (u32)atomic_read(&sk->sk_zckey);
- if ((u32)(uarg->id + uarg->len) == next) {
- if (mm_account_pinned_pages(&uarg->mmp, size))
+ if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
+ if (mm_account_pinned_pages(&uarg_zc->mmp, size))
return NULL;
- uarg->len++;
- uarg->bytelen = bytelen;
+ uarg_zc->len++;
+ uarg_zc->bytelen = bytelen;
atomic_set(&sk->sk_zckey, ++next);
/* no extra ref when appending to datagram (MSG_MORE) */
@@ -1266,7 +1391,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
return true;
}
-static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
{
struct sk_buff *tail, *skb = skb_from_uarg(uarg);
struct sock_exterr_skb *serr;
@@ -1319,37 +1444,32 @@ release:
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success)
{
- uarg->zerocopy = uarg->zerocopy & success;
+ struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
+
+ uarg_zc->zerocopy = uarg_zc->zerocopy & success;
if (refcount_dec_and_test(&uarg->refcnt))
- __msg_zerocopy_callback(uarg);
+ __msg_zerocopy_callback(uarg_zc);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
- struct sock *sk = skb_from_uarg(uarg)->sk;
+ struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;
atomic_dec(&sk->sk_zckey);
- uarg->len--;
+ uarg_to_msgzc(uarg)->len--;
if (have_uref)
msg_zerocopy_callback(NULL, uarg, true);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
-int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
-{
- return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
-}
-EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
-
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
{
struct ubuf_info *orig_uarg = skb_zcopy(skb);
- struct iov_iter orig_iter = msg->msg_iter;
int err, orig_len = skb->len;
/* An skb can only point to one uarg. This edge case happens when
@@ -1358,12 +1478,12 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
if (orig_uarg && uarg != orig_uarg)
return -EEXIST;
- err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+ err = __zerocopy_sg_from_iter(msg, sk, skb, &msg->msg_iter, len);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
struct sock *save_sk = skb->sk;
/* Streams do not free skb on error. Reset to prev state. */
- msg->msg_iter = orig_iter;
+ iov_iter_revert(&msg->msg_iter, skb->len - orig_len);
skb->sk = sk;
___pskb_trim(skb, orig_len);
skb->sk = save_sk;
@@ -1375,6 +1495,16 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+void __skb_zcopy_downgrade_managed(struct sk_buff *skb)
+{
+ int i;
+
+ skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ skb_frag_ref(skb, i);
+}
+EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed);
+
static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
gfp_t gfp_mask)
{
@@ -1512,6 +1642,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
refcount_read(&fclones->fclone_ref) == 1) {
n = &fclones->skb2;
refcount_set(&fclones->fclone_ref, 2);
+ n->fclone = SKB_FCLONE_CLONE;
} else {
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
@@ -1692,6 +1823,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
BUG_ON(skb_shared(skb));
+ skb_zcopy_downgrade_managed(skb);
+
size = SKB_DATA_ALIGN(size);
if (skb_pfmemalloc(skb))
@@ -1736,11 +1869,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->head = data;
skb->head_frag = 0;
skb->data += off;
+
+ skb_set_end_offset(skb, size);
#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
off = nhead;
-#else
- skb->end = skb->head + size;
#endif
skb->tail += off;
skb_headers_offset_update(skb, nhead);
@@ -1788,6 +1920,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
@@ -2276,7 +2440,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Free pulled out fragments. */
while ((list = skb_shinfo(skb)->frag_list) != insp) {
skb_shinfo(skb)->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -3163,9 +3327,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
}
}
- to->truesize += len + plen;
- to->len += len + plen;
- to->data_len += len + plen;
+ skb_len_add(to, len + plen);
if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
skb_tx_error(from);
@@ -3457,6 +3619,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
int pos = skb_headlen(skb);
const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
+ skb_zcopy_downgrade_managed(skb);
+
skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
skb_zerocopy_clone(skb1, skb, 0);
if (len < pos) /* Split line is inside header. */
@@ -3602,13 +3766,8 @@ onlymerged:
tgt->ip_summed = CHECKSUM_PARTIAL;
skb->ip_summed = CHECKSUM_PARTIAL;
- /* Yak, is it really working this way? Some helper please? */
- skb->len -= shiftlen;
- skb->data_len -= shiftlen;
- skb->truesize -= shiftlen;
- tgt->len += shiftlen;
- tgt->data_len += shiftlen;
- tgt->truesize += shiftlen;
+ skb_len_add(skb, -shiftlen);
+ skb_len_add(tgt, shiftlen);
return shiftlen;
}
@@ -3810,8 +3969,9 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
} else if (i < MAX_SKB_FRAGS) {
+ skb_zcopy_downgrade_managed(skb);
get_page(page);
- skb_fill_page_desc(skb, i, page, offset, size);
+ skb_fill_page_desc_noacc(skb, i, page, offset, size);
} else {
return -EMSGSIZE;
}
@@ -3865,7 +4025,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
struct sk_buff *nskb, *tmp;
- int err;
+ int len_diff, err;
skb_push(skb, -skb_network_offset(skb) + offset);
@@ -3876,6 +4036,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
list_skb = list_skb->next;
err = 0;
+ delta_truesize += nskb->truesize;
if (skb_shared(nskb)) {
tmp = skb_clone(nskb, GFP_ATOMIC);
if (tmp) {
@@ -3900,14 +4061,15 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
tail = nskb;
delta_len += nskb->len;
- delta_truesize += nskb->truesize;
skb_push(nskb, -skb_network_offset(nskb) + offset);
skb_release_head_state(nskb);
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
__copy_skb_header(nskb, skb);
skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ nskb->transport_header += len_diff;
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
offset + tnl_hlen);
@@ -3972,23 +4134,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int i = 0;
int pos;
- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
- /* gso_size is untrusted, and we have a frag_list with a linear
- * non head_frag head.
- *
- * (we assume checking the first list_skb member suffices;
- * i.e if either of the list_skb members have non head_frag
- * head, then the first one has too).
- *
- * If head_skb's headlen does not fit requested gso_size, it
- * means that the frag_list members do NOT terminate on exact
- * gso_size boundaries. Hence we cannot perform skb_frag_t page
- * sharing. Therefore we must fallback to copying the frag_list
- * skbs; we do so by disabling SG.
- */
- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
- features &= ~NETIF_F_SG;
+ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
+ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
+ struct sk_buff *check_skb;
+
+ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
+ if (skb_headlen(check_skb) && !check_skb->head_frag) {
+ /* gso_size is untrusted, and we have a frag_list with
+ * a linear non head_frag item.
+ *
+ * If head_skb's headlen does not fit requested gso_size,
+ * it means that the frag_list members do NOT terminate
+ * on exact gso_size boundaries. Hence we cannot perform
+ * skb_frag_t page sharing. Therefore we must fallback to
+ * copying the frag_list skbs; we do so by disabling SG.
+ */
+ features &= ~NETIF_F_SG;
+ break;
+ }
+ }
}
__skb_push(head_skb, doffset);
@@ -4150,9 +4314,8 @@ normal:
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
} else {
- skb_copy_bits(head_skb, offset,
- skb_put(nskb, len),
- len);
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
+ goto err;
}
continue;
}
@@ -4730,7 +4893,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
- serr->ee.ee_data -= sk->sk_tskey;
+ serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
}
err = sock_queue_err_skb(sk, skb);
@@ -4743,7 +4906,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly))
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
return true;
read_lock_bh(&sk->sk_callback_lock);
@@ -4820,7 +4983,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (hwtstamps)
*skb_hwtstamps(skb) = *hwtstamps;
else
- skb->tstamp = ktime_get_real();
+ __net_timestamp(skb);
__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
}
@@ -5244,11 +5407,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
- /* The page pool signature of struct page will eventually figure out
- * which pages can be recycled or not but for now let's prohibit slab
- * allocated and page_pool allocated SKBs from being coalesced.
+ /* In general, avoid mixing slab allocated and page_pool allocated
+ * pages within the same SKB. However when @to is not pp_recycle and
+ * @from is cloned, we can transition frag pages from page_pool to
+ * reference counted.
+ *
+ * On the other hand, don't allow coalescing two pp_recycle SKBs if
+ * @from is cloned, in case the SKB is using page_pool fragment
+ * references (PP_FLAG_PAGE_FRAG). Since we only take full page
+ * references for cloned SKBs at the moment that would result in
+ * inconsistent reference counts.
*/
- if (to->pp_recycle != from->pp_recycle)
+ if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from)))
return false;
if (len <= skb_tailroom(to)) {
@@ -5350,7 +5520,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
ipvs_reset(skb);
skb->mark = 0;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);
@@ -5562,7 +5732,7 @@ err_free:
}
EXPORT_SYMBOL(skb_vlan_untag);
-int skb_ensure_writable(struct sk_buff *skb, int write_len)
+int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
{
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
@@ -6044,11 +6214,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb->head = data;
skb->data = data;
skb->head_frag = 0;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
-#else
- skb->end = skb->head + size;
-#endif
+ skb_set_end_offset(skb, size);
skb_set_tail_pointer(skb, skb_headlen(skb));
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
@@ -6105,7 +6271,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
/* Free pulled out fragments. */
while ((list = shinfo->frag_list) != insp) {
shinfo->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -6186,11 +6352,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb->head = data;
skb->head_frag = 0;
skb->data = data;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
-#else
- skb->end = skb->head + size;
-#endif
+ skb_set_end_offset(skb, size);
skb_reset_tail_pointer(skb);
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
@@ -6455,3 +6617,49 @@ free_now:
}
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */
+
+/**
+ * skb_attempt_defer_free - queue skb for remote freeing
+ * @skb: buffer
+ *
+ * Put @skb in a per-cpu list, using the cpu which
+ * allocated the skb/pages to reduce false sharing
+ * and memory zone spinlock contention.
+ */
+void skb_attempt_defer_free(struct sk_buff *skb)
+{
+ int cpu = skb->alloc_cpu;
+ struct softnet_data *sd;
+ unsigned long flags;
+ unsigned int defer_max;
+ bool kick;
+
+ if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
+ !cpu_online(cpu) ||
+ cpu == raw_smp_processor_id()) {
+nodefer: __kfree_skb(skb);
+ return;
+ }
+
+ sd = &per_cpu(softnet_data, cpu);
+ defer_max = READ_ONCE(sysctl_skb_defer_max);
+ if (READ_ONCE(sd->defer_count) >= defer_max)
+ goto nodefer;
+
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ /* Send an IPI every time queue reaches half capacity. */
+ kick = sd->defer_count == (defer_max >> 1);
+ /* Paired with the READ_ONCE() few lines above */
+ WRITE_ONCE(sd->defer_count, sd->defer_count + 1);
+
+ skb->next = sd->defer_list;
+ /* Paired with READ_ONCE() in skb_defer_free_flush() */
+ WRITE_ONCE(sd->defer_list, skb);
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
+ * if we are unlucky enough (this seems very unlikely).
+ */
+ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
+ smp_call_function_single_async(cpu, &sd->defer_csd);
+}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 8eb671c827f9..e6b9ced3eda8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce)
{
struct page_frag *pfrag = sk_page_frag(sk);
+ u32 osize = msg->sg.size;
int ret = 0;
len -= msg->sg.size;
@@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
u32 orig_offset;
int use, i;
- if (!sk_page_frag_refill(sk, pfrag))
- return -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
orig_offset = pfrag->offset;
use = min_t(int, len, pfrag->size - orig_offset);
- if (!sk_wmem_schedule(sk, use))
- return -ENOMEM;
+ if (!sk_wmem_schedule(sk, use)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
i = msg->sg.end;
sk_msg_iter_var_prev(i);
@@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
}
return ret;
+
+msg_trim:
+ sk_msg_trim(sk, msg, osize);
+ return ret;
}
EXPORT_SYMBOL_GPL(sk_msg_alloc);
@@ -315,14 +324,13 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
goto out;
}
- copied = iov_iter_get_pages(from, pages, bytes, maxpages,
+ copied = iov_iter_get_pages2(from, pages, bytes, maxpages,
&offset);
if (copied <= 0) {
ret = -EFAULT;
goto out;
}
- iov_iter_advance(from, copied);
bytes -= copied;
msg->sg.size += copied;
@@ -426,8 +434,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
if (copied + copy > len)
copy = len - copied;
copy = copy_page_to_iter(page, sge->offset, copy, iter);
- if (!copy)
- return copied ? copied : -EFAULT;
+ if (!copy) {
+ copied = copied ? copied : -EFAULT;
+ goto out;
+ }
copied += copy;
if (likely(!peek)) {
@@ -447,13 +457,13 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
* didn't copy the entire length lets just break.
*/
if (copy != sge->length)
- return copied;
+ goto out;
sk_msg_iter_var_next(i);
}
if (copied == len)
break;
- } while (i != msg_rx->sg.end);
+ } while ((i != msg_rx->sg.end) && !sg_is_last(sge));
if (unlikely(peek)) {
msg_rx = sk_psock_next_msg(psock, msg_rx);
@@ -463,13 +473,15 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
msg_rx->sg.start = i;
- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+ if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) {
msg_rx = sk_psock_dequeue_msg(psock);
kfree_sk_msg(msg_rx);
}
msg_rx = sk_psock_peek_msg(psock);
}
-
+out:
+ if (psock->work_state.skb && copied > 0)
+ schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@@ -488,23 +500,27 @@ bool sk_msg_is_readable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_msg_is_readable);
-static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
- struct sk_buff *skb)
+static struct sk_msg *alloc_sk_msg(gfp_t gfp)
{
struct sk_msg *msg;
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN);
+ if (unlikely(!msg))
return NULL;
+ sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
+ return msg;
+}
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
- if (unlikely(!msg))
+ if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- sk_msg_init(msg);
- return msg;
+ return alloc_sk_msg(GFP_KERNEL);
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -515,16 +531,20 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
{
int num_sge, copied;
- /* skb linearize may fail with ENOMEM, but lets simply try again
- * later if this happens. Under memory pressure we don't want to
- * drop the skb. We need to linearize the skb so that the mapping
- * in skb_to_sgvec can not error.
- */
- if (skb_linearize(skb))
- return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
- if (unlikely(num_sge < 0))
- return num_sge;
+ if (num_sge < 0) {
+ /* skb linearize may fail with ENOMEM, but lets simply try again
+ * later if this happens. Under memory pressure we don't want to
+ * drop the skb. We need to linearize the skb so that the mapping
+ * in skb_to_sgvec can not error.
+ */
+ if (skb_linearize(skb))
+ return -EAGAIN;
+
+ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
+ if (unlikely(num_sge < 0))
+ return num_sge;
+ }
copied = len;
msg->sg.start = 0;
@@ -577,13 +597,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{
- struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
int err;
if (unlikely(!msg))
return -EAGAIN;
- sk_msg_init(msg);
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
@@ -686,6 +705,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
write_lock_bh(&sk->sk_callback_lock);
+ if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
@@ -702,6 +726,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
psock->eval = __SK_NONE;
psock->sk_proto = prot;
psock->saved_unhash = prot->unhash;
+ psock->saved_destroy = prot->destroy;
psock->saved_close = prot->close;
psock->saved_write_space = sk->sk_write_space;
@@ -717,7 +742,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
refcount_set(&psock->refcnt, 1);
- rcu_assign_sk_user_data_nocopy(sk, psock);
+ __rcu_assign_sk_user_data_with_flags(sk, psock,
+ SK_USER_DATA_NOCOPY |
+ SK_USER_DATA_PSOCK);
sock_hold(sk);
out:
@@ -776,16 +803,13 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
}
}
-void sk_psock_stop(struct sk_psock *psock, bool wait)
+void sk_psock_stop(struct sk_psock *psock)
{
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
__sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
-
- if (wait)
- cancel_work_sync(&psock->work);
}
static void sk_psock_done_strp(struct sk_psock *psock);
@@ -823,7 +847,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
- sk_psock_stop(psock, false);
+ sk_psock_stop(psock);
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
queue_rcu_work(system_wq, &psock->rwork);
@@ -1146,21 +1170,14 @@ static void sk_psock_done_strp(struct sk_psock *psock)
}
#endif /* CONFIG_BPF_STREAM_PARSER */
-static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
- unsigned int offset, size_t orig_len)
+static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *)desc->arg.data;
struct sk_psock *psock;
struct bpf_prog *prog;
int ret = __SK_DROP;
int len = skb->len;
- /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb) {
- desc->error = -ENOMEM;
- return 0;
- }
+ skb_get(skb);
rcu_read_lock();
psock = sk_psock(sk);
@@ -1173,15 +1190,14 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
if (!prog)
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
- skb->sk = sk;
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
- skb->sk = NULL;
}
- if (sk_psock_verdict_apply(psock, skb, ret) < 0)
- len = 0;
+ ret = sk_psock_verdict_apply(psock, skb, ret);
+ if (ret < 0)
+ len = ret;
out:
rcu_read_unlock();
return len;
@@ -1190,16 +1206,10 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- read_descriptor_t desc;
- if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
-
- desc.arg.data = sk;
- desc.error = 0;
- desc.count = 1;
-
- sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+ sock->ops->read_skb(sk, sk_psock_verdict_recv);
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
diff --git a/net/core/sock.c b/net/core/sock.c
index e21485ab285d..a3ba0358c77c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -141,9 +141,14 @@
#include <linux/ethtool.h>
+#include "dev.h"
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+static void sock_def_write_space_wfree(struct sock *sk);
+static void sock_def_write_space(struct sock *sk);
+
/**
* sk_ns_capable - General socket capability test
* @sk: Socket to use a capability on or through
@@ -503,17 +508,35 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(__sock_queue_rcv_skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
+ enum skb_drop_reason drop_reason;
int err;
err = sk_filter(sk, skb);
- if (err)
- return err;
-
- return __sock_queue_rcv_skb(sk, skb);
+ if (err) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ goto out;
+ }
+ err = __sock_queue_rcv_skb(sk, skb);
+ switch (err) {
+ case -ENOMEM:
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ break;
+ case -ENOBUFS:
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ break;
+ default:
+ drop_reason = SKB_NOT_DROPPED_YET;
+ break;
+ }
+out:
+ if (reason)
+ *reason = drop_reason;
+ return err;
}
-EXPORT_SYMBOL(sock_queue_rcv_skb);
+EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested, unsigned int trim_cap, bool refcounted)
@@ -612,7 +635,9 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
if (ifindex < 0)
goto out;
- sk->sk_bound_dev_if = ifindex;
+ /* Paired with all READ_ONCE() done locklessly. */
+ WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
+
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
sk_dst_reset(sk);
@@ -678,22 +703,25 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
goto out;
}
- return sock_bindtoindex(sk, index, true);
+ sockopt_lock_sock(sk);
+ ret = sock_bindtoindex_locked(sk, index);
+ sockopt_release_sock(sk);
out:
#endif
return ret;
}
-static int sock_getbindtodevice(struct sock *sk, char __user *optval,
- int __user *optlen, int len)
+static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
- if (sk->sk_bound_dev_if == 0) {
+ if (bound_dev_if == 0) {
len = 0;
goto zero;
}
@@ -702,19 +730,19 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
if (len < IFNAMSIZ)
goto out;
- ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
+ ret = netdev_get_name(net, devname, bound_dev_if);
if (ret)
goto out;
len = strlen(devname) + 1;
ret = -EFAULT;
- if (copy_to_user(optval, devname, len))
+ if (copy_to_sockptr(optval, devname, len))
goto out;
zero:
ret = -EFAULT;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
goto out;
ret = 0;
@@ -844,6 +872,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
}
num = ethtool_get_phc_vclocks(dev, &vclock_index);
+ dev_put(dev);
+
for (i = 0; i < num; i++) {
if (*(vclock_index + i) == phc_index) {
match = true;
@@ -877,9 +907,9 @@ int sock_set_timestamping(struct sock *sk, int optname,
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
return -EINVAL;
- sk->sk_tskey = tcp_sk(sk)->snd_una;
+ atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
} else {
- sk->sk_tskey = 0;
+ atomic_set(&sk->sk_tskey, 0);
}
}
@@ -963,7 +993,7 @@ EXPORT_SYMBOL(sock_set_mark);
static void sock_release_reserved_memory(struct sock *sk, int bytes)
{
/* Round down bytes to multiple of pages */
- bytes &= ~(SK_MEM_QUANTUM - 1);
+ bytes = round_down(bytes, PAGE_SIZE);
WARN_ON(bytes > sk->sk_reserved_mem);
sk->sk_reserved_mem -= bytes;
@@ -991,7 +1021,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
return -ENOMEM;
/* pre-charge to forward_alloc */
- allocated = sk_memory_allocated_add(sk, pages);
+ sk_memory_allocated_add(sk, pages);
+ allocated = sk_memory_allocated(sk);
/* If the system goes into memory pressure with this
* precharge, give up and return error.
*/
@@ -1000,24 +1031,58 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
return -ENOMEM;
}
- sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc += pages << PAGE_SHIFT;
- sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_reserved_mem += pages << PAGE_SHIFT;
return 0;
}
+void sockopt_lock_sock(struct sock *sk)
+{
+ /* When current->bpf_ctx is set, the setsockopt is called from
+ * a bpf prog. bpf has ensured the sk lock has been
+ * acquired before calling setsockopt().
+ */
+ if (has_current_bpf_ctx())
+ return;
+
+ lock_sock(sk);
+}
+EXPORT_SYMBOL(sockopt_lock_sock);
+
+void sockopt_release_sock(struct sock *sk)
+{
+ if (has_current_bpf_ctx())
+ return;
+
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sockopt_release_sock);
+
+bool sockopt_ns_capable(struct user_namespace *ns, int cap)
+{
+ return has_current_bpf_ctx() || ns_capable(ns, cap);
+}
+EXPORT_SYMBOL(sockopt_ns_capable);
+
+bool sockopt_capable(int cap)
+{
+ return has_current_bpf_ctx() || capable(cap);
+}
+EXPORT_SYMBOL(sockopt_capable);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
*/
-int sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen)
+int sk_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct so_timestamping timestamping;
+ struct socket *sock = sk->sk_socket;
struct sock_txtime sk_txtime;
- struct sock *sk = sock->sk;
int val;
int valbool;
struct linger ling;
@@ -1038,11 +1103,11 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
valbool = val ? 1 : 0;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
switch (optname) {
case SO_DEBUG:
- if (val && !capable(CAP_NET_ADMIN))
+ if (val && !sockopt_capable(CAP_NET_ADMIN))
ret = -EACCES;
else
sock_valbool_flag(sk, SOCK_DBG, valbool);
@@ -1072,7 +1137,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
set_sndbuf:
/* Ensure val * 2 fits into an int, to prevent max_t()
* from treating it as a negative value.
@@ -1086,7 +1151,7 @@ set_sndbuf:
break;
case SO_SNDBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
+ if (!sockopt_capable(CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1104,11 +1169,11 @@ set_sndbuf:
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
break;
case SO_RCVBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
+ if (!sockopt_capable(CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1135,8 +1200,8 @@ set_sndbuf:
case SO_PRIORITY:
if ((val >= 0 && val <= 6) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
sk->sk_priority = val;
else
ret = -EPERM;
@@ -1199,7 +1264,7 @@ set_sndbuf:
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
- if (sock->ops->set_rcvlowat)
+ if (sock && sock->ops->set_rcvlowat)
ret = sock->ops->set_rcvlowat(sk, val);
else
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
@@ -1281,14 +1346,23 @@ set_sndbuf:
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
case SO_MARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
__sock_set_mark(sk, val);
break;
+ case SO_RCVMARK:
+ if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+
+ sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
+ break;
case SO_RXQ_OVFL:
sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
@@ -1316,7 +1390,7 @@ set_sndbuf:
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
/* allow unprivileged users to decrease the value */
- if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
+ if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN))
ret = -EPERM;
else {
if (val < 0)
@@ -1326,13 +1400,13 @@ set_sndbuf:
}
break;
case SO_PREFER_BUSY_POLL:
- if (valbool && !capable(CAP_NET_ADMIN))
+ if (valbool && !sockopt_capable(CAP_NET_ADMIN))
ret = -EPERM;
else
WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
break;
case SO_BUSY_POLL_BUDGET:
- if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
+ if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) {
ret = -EPERM;
} else {
if (val < 0 || val > U16_MAX)
@@ -1375,9 +1449,9 @@ set_sndbuf:
if (!(sk_is_tcp(sk) ||
(sk->sk_type == SOCK_DGRAM &&
sk->sk_protocol == IPPROTO_UDP)))
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
} else if (sk->sk_family != PF_RDS) {
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
}
if (!ret) {
if (val < 0 || val > 1)
@@ -1403,7 +1477,7 @@ set_sndbuf:
* scheduler has enough safe guards.
*/
if (sk_txtime.clockid != CLOCK_MONOTONIC &&
- !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
break;
}
@@ -1445,13 +1519,29 @@ set_sndbuf:
break;
}
+ case SO_TXREHASH:
+ if (val < -1 || val > 1) {
+ ret = -EINVAL;
+ break;
+ }
+ /* Paired with READ_ONCE() in tcp_rtx_synack() */
+ WRITE_ONCE(sk->sk_txrehash, (u8)val);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
}
- release_sock(sk);
+ sockopt_release_sock(sk);
return ret;
}
+
+int sock_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ return sk_setsockopt(sock->sk, level, optname,
+ optval, optlen);
+}
EXPORT_SYMBOL(sock_setsockopt);
static const struct cred *sk_get_peer_cred(struct sock *sk)
@@ -1478,22 +1568,25 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
}
}
-static int groups_to_user(gid_t __user *dst, const struct group_info *src)
+static int groups_to_user(sockptr_t dst, const struct group_info *src)
{
struct user_namespace *user_ns = current_user_ns();
int i;
- for (i = 0; i < src->ngroups; i++)
- if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
+ for (i = 0; i < src->ngroups; i++) {
+ gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
+
+ if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
return -EFAULT;
+ }
return 0;
}
-int sock_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+int sk_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen)
{
- struct sock *sk = sock->sk;
+ struct socket *sock = sk->sk_socket;
union {
int val;
@@ -1510,7 +1603,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
int lv = sizeof(int);
int len;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len < 0)
return -EINVAL;
@@ -1645,7 +1738,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
spin_unlock(&sk->sk_peer_lock);
- if (copy_to_user(optval, &peercred, len))
+ if (copy_to_sockptr(optval, &peercred, len))
return -EFAULT;
goto lenout;
}
@@ -1663,11 +1756,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
put_cred(cred);
- return put_user(len, optlen) ? -EFAULT : -ERANGE;
+ return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ ret = groups_to_user(optval, cred->group_info);
put_cred(cred);
if (ret)
return ret;
@@ -1683,7 +1776,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
return -ENOTCONN;
if (lv < len)
return -EINVAL;
- if (copy_to_user(optval, address, len))
+ if (copy_to_sockptr(optval, address, len))
return -EFAULT;
goto lenout;
}
@@ -1700,12 +1793,16 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_PEERSEC:
- return security_socket_getpeersec_stream(sock, optval, optlen, len);
+ return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len);
case SO_MARK:
v.val = sk->sk_mark;
break;
+ case SO_RCVMARK:
+ v.val = sock_flag(sk, SOCK_RCVMARK);
+ break;
+
case SO_RXQ_OVFL:
v.val = sock_flag(sk, SOCK_RXQ_OVFL);
break;
@@ -1728,7 +1825,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
return sock_getbindtodevice(sk, optval, optlen, len);
case SO_GET_FILTER:
- len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+ len = sk_get_filter(sk, optval, len);
if (len < 0)
return len;
@@ -1776,7 +1873,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
sk_get_meminfo(sk, meminfo);
len = min_t(unsigned int, len, sizeof(meminfo));
- if (copy_to_user(optval, &meminfo, len))
+ if (copy_to_sockptr(optval, &meminfo, len))
return -EFAULT;
goto lenout;
@@ -1814,7 +1911,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTOIFINDEX:
- v.val = sk->sk_bound_dev_if;
+ v.val = READ_ONCE(sk->sk_bound_dev_if);
break;
case SO_NETNS_COOKIE:
@@ -1832,6 +1929,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_reserved_mem;
break;
+ case SO_TXREHASH:
+ v.val = sk->sk_txrehash;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1841,14 +1942,22 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
if (len > lv)
len = lv;
- if (copy_to_user(optval, &v, len))
+ if (copy_to_sockptr(optval, &v, len))
return -EFAULT;
lenout:
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
return 0;
}
+int sock_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ return sk_getsockopt(sock->sk, level, optname,
+ USER_SOCKPTR(optval),
+ USER_SOCKPTR(optlen));
+}
+
/*
* Initialize an sk_lock.
*
@@ -2242,6 +2351,19 @@ void sk_free_unlock_clone(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+static void sk_trim_gso_size(struct sock *sk)
+{
+ if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
+ return;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ sk_is_tcp(sk) &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return;
+#endif
+ sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
+}
+
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
@@ -2261,6 +2383,8 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+ sk_trim_gso_size(sk);
+ sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
}
@@ -2281,8 +2405,20 @@ void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
unsigned int len = skb->truesize;
+ bool free;
if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ if (sock_flag(sk, SOCK_RCU_FREE) &&
+ sk->sk_write_space == sock_def_write_space) {
+ rcu_read_lock();
+ free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
+ sock_def_write_space_wfree(sk);
+ rcu_read_unlock();
+ if (unlikely(free))
+ __sk_free(sk);
+ return;
+ }
+
/*
* Keep a reference on sk_wmem_alloc, this will be released
* after sk_write_space() call
@@ -2454,7 +2590,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- sysctl_optmem_max)
+ READ_ONCE(sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2472,8 +2608,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- if ((unsigned int)size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+ if ((unsigned int)size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
@@ -2592,13 +2730,6 @@ failure:
}
EXPORT_SYMBOL(sock_alloc_send_pskb);
-struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
- int noblock, int *errcode)
-{
- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
-}
-EXPORT_SYMBOL(sock_alloc_send_skb);
-
int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
struct sockcm_cookie *sockc)
{
@@ -2606,7 +2737,8 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
switch (cmsg->cmsg_type) {
case SO_MARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
return -EINVAL;
@@ -2769,7 +2901,7 @@ void __release_sock(struct sock *sk)
do {
next = skb->next;
prefetch(next);
- WARN_ON_ONCE(skb_dst_is_noref(skb));
+ DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
skb_mark_not_on_list(skb);
sk_backlog_rcv(sk, skb);
@@ -2794,6 +2926,7 @@ void __sk_flush_backlog(struct sock *sk)
__release_sock(sk);
spin_unlock_bh(&sk->sk_lock.slock);
}
+EXPORT_SYMBOL_GPL(__sk_flush_backlog);
/**
* sk_wait_data - wait for data to arrive at sk_receive_queue
@@ -2831,11 +2964,13 @@ EXPORT_SYMBOL(sk_wait_data);
*/
int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
- struct proto *prot = sk->sk_prot;
- long allocated = sk_memory_allocated_add(sk, amt);
bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
+ struct proto *prot = sk->sk_prot;
bool charged = true;
+ long allocated;
+ sk_memory_allocated_add(sk, amt);
+ allocated = sk_memory_allocated(sk);
if (memcg_charge &&
!(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
gfp_memcg_charge())))
@@ -2912,7 +3047,6 @@ suppress_allocation:
return 0;
}
-EXPORT_SYMBOL(__sk_mem_raise_allocated);
/**
* __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
@@ -2928,10 +3062,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
int ret, amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc += amt << PAGE_SHIFT;
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
if (!ret)
- sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc -= amt << PAGE_SHIFT;
return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -2954,17 +3088,16 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
-EXPORT_SYMBOL(__sk_mem_reduce_allocated);
/**
* __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
* @sk: socket
- * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ * @amount: number of bytes (rounded down to a PAGE_SIZE multiple)
*/
void __sk_mem_reclaim(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
- sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ amount >>= PAGE_SHIFT;
+ sk->sk_forward_alloc -= amount << PAGE_SHIFT;
__sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3154,20 +3287,42 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
+ if (sock_writeable(sk)) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- if (sock_writeable(sk))
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
+/* An optimised version of sock_def_write_space(), should only be called
+ * for SOCK_RCU_FREE sockets under RCU read section and after putting
+ * ->sk_wmem_alloc.
+ */
+static void sock_def_write_space_wfree(struct sock *sk)
+{
+ /* Do not wake up a writer until he can make "significant"
+ * progress. --DaveM
+ */
+ if (sock_writeable(sk)) {
+ struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+ /* rely on refcount_sub from sock_wfree() */
+ smp_mb__after_atomic();
+ if (wq && waitqueue_active(&wq->wait))
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
+
+ /* Should agree with poll, otherwise some programs break */
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ }
+}
+
static void sock_def_destruct(struct sock *sk)
{
}
@@ -3210,8 +3365,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
- sk->sk_rcvbuf = sysctl_rmem_default;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -3266,13 +3421,14 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_busy_read;
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
#endif
sk->sk_max_pacing_rate = ~0UL;
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
+ sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk);
/*
@@ -3454,7 +3610,8 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(sock_common_getsockopt);
@@ -3465,8 +3622,7 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int addr_len = 0;
int err;
- err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
@@ -3481,7 +3637,8 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(sock_common_setsockopt);
@@ -3697,6 +3854,14 @@ int proto_register(struct proto *prot, int alloc_slab)
{
int ret = -ENOBUFS;
+ if (prot->memory_allocated && !prot->sysctl_mem) {
+ pr_err("%s: missing sysctl_mem\n", prot->name);
+ return -EINVAL;
+ }
+ if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
+ pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
+ return -EINVAL;
+ }
if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0,
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1827669eedd6..81beb16ab1eb 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -41,7 +41,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
- stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT);
+ stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -52,7 +52,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
sizeof(struct sock *),
stab->map.numa_node);
if (!stab->sks) {
- kfree(stab);
+ bpf_map_area_free(stab);
return ERR_PTR(-ENOMEM);
}
@@ -361,7 +361,7 @@ static void sock_map_free(struct bpf_map *map)
synchronize_rcu();
bpf_map_area_free(stab->sks);
- kfree(stab);
+ bpf_map_area_free(stab);
}
static void sock_map_release_progs(struct bpf_map *map)
@@ -783,17 +783,26 @@ static int sock_map_init_seq_private(void *priv_data,
{
struct sock_map_seq_info *info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
info->map = aux->map;
return 0;
}
+static void sock_map_fini_seq_private(void *priv_data)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ bpf_map_put_with_uref(info->map);
+}
+
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
.seq_ops = &sock_map_seq_ops,
.init_seq_private = sock_map_init_seq_private,
+ .fini_seq_private = sock_map_fini_seq_private,
.seq_priv_size = sizeof(struct sock_map_seq_info),
};
-static int sock_map_btf_id;
+BTF_ID_LIST_SINGLE(sock_map_btf_ids, struct, bpf_stab)
const struct bpf_map_ops sock_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_map_alloc,
@@ -805,8 +814,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_stab",
- .map_btf_id = &sock_map_btf_id,
+ .map_btf_id = &sock_map_btf_ids[0],
.iter_seq_info = &sock_map_iter_seq_info,
};
@@ -1077,7 +1085,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
if (attr->key_size > MAX_BPF_STACK)
return ERR_PTR(-E2BIG);
- htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
+ htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -1107,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return &htab->map;
free_htab:
- kfree(htab);
+ bpf_map_area_free(htab);
return ERR_PTR(err);
}
@@ -1160,7 +1168,7 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
bpf_map_area_free(htab->buckets);
- kfree(htab);
+ bpf_map_area_free(htab);
}
static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
@@ -1370,22 +1378,31 @@ static const struct seq_operations sock_hash_seq_ops = {
};
static int sock_hash_init_seq_private(void *priv_data,
- struct bpf_iter_aux_info *aux)
+ struct bpf_iter_aux_info *aux)
{
struct sock_hash_seq_info *info = priv_data;
+ bpf_map_inc_with_uref(aux->map);
info->map = aux->map;
info->htab = container_of(aux->map, struct bpf_shtab, map);
return 0;
}
+static void sock_hash_fini_seq_private(void *priv_data)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ bpf_map_put_with_uref(info->map);
+}
+
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
.seq_ops = &sock_hash_seq_ops,
.init_seq_private = sock_hash_init_seq_private,
+ .fini_seq_private = sock_hash_fini_seq_private,
.seq_priv_size = sizeof(struct sock_hash_seq_info),
};
-static int sock_hash_map_btf_id;
+BTF_ID_LIST_SINGLE(sock_hash_map_btf_ids, struct, bpf_shtab)
const struct bpf_map_ops sock_hash_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_hash_alloc,
@@ -1397,8 +1414,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_shtab",
- .map_btf_id = &sock_hash_map_btf_id,
+ .map_btf_id = &sock_hash_map_btf_ids[0],
.iter_seq_info = &sock_hash_iter_seq_info,
};
@@ -1416,38 +1432,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
- struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- pprog = &progs->msg_parser;
+ *pprog = &progs->msg_parser;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- pprog = &progs->stream_parser;
+ *pprog = &progs->stream_parser;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- pprog = &progs->stream_verdict;
+ *pprog = &progs->stream_verdict;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- pprog = &progs->skb_verdict;
+ *pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+ struct bpf_prog *old, u32 which)
+{
+ struct bpf_prog **pprog;
+ int ret;
+
+ ret = sock_map_prog_lookup(map, &pprog, which);
+ if (ret)
+ return ret;
+
if (old)
return psock_replace_prog(pprog, prog, old);
@@ -1455,6 +1483,57 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
return 0;
}
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ struct bpf_prog **pprog;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ u32 id = 0;
+ int ret;
+
+ if (attr->query.query_flags)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ rcu_read_lock();
+
+ ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ if (ret)
+ goto end;
+
+ prog = *pprog;
+ prog_cnt = !prog ? 0 : 1;
+
+ if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+ goto end;
+
+ /* we do not hold the refcnt, the bpf prog may be released
+ * asynchronously and the id would be set to 0.
+ */
+ id = data_race(prog->aux->id);
+ if (id == 0)
+ prog_cnt = 0;
+
+end:
+ rcu_read_unlock();
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+ (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+ copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+ ret = -EFAULT;
+
+ fdput(f);
+ return ret;
+}
+
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
@@ -1500,6 +1579,29 @@ void sock_map_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
+void sock_map_destroy(struct sock *sk)
+{
+ void (*saved_destroy)(struct sock *sk);
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock)) {
+ rcu_read_unlock();
+ if (sk->sk_prot->destroy)
+ sk->sk_prot->destroy(sk);
+ return;
+ }
+
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ sk_psock_put(sk, psock);
+ saved_destroy(sk);
+}
+EXPORT_SYMBOL_GPL(sock_map_destroy);
+
void sock_map_close(struct sock *sk, long timeout)
{
void (*saved_close)(struct sock *sk, long timeout);
@@ -1517,9 +1619,10 @@ void sock_map_close(struct sock *sk, long timeout)
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
- sk_psock_stop(psock, true);
- sk_psock_put(sk, psock);
+ sk_psock_stop(psock);
release_sock(sk);
+ cancel_work_sync(&psock->work);
+ sk_psock_put(sk, psock);
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 3f00a28fe762..fb90e1e00773 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -21,6 +21,22 @@ static DEFINE_IDA(reuseport_ida);
static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
struct sock_reuseport *reuse, bool bind_inany);
+void reuseport_has_conns_set(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+
+ if (!rcu_access_pointer(sk->sk_reuseport_cb))
+ return;
+
+ spin_lock_bh(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (likely(reuse))
+ reuse->has_conns = 1;
+ spin_unlock_bh(&reuseport_lock);
+}
+EXPORT_SYMBOL(reuseport_has_conns_set);
+
static int reuseport_sock_index(struct sock *sk,
const struct sock_reuseport *reuse,
bool closed)
@@ -387,7 +403,7 @@ void reuseport_stop_listen_sock(struct sock *sk)
prog = rcu_dereference_protected(reuse->prog,
lockdep_is_held(&reuseport_lock));
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
(prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
/* Migration capable, move sk from the listening section
* to the closed section.
@@ -545,7 +561,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
hash = migrating_sk->sk_hash;
prog = rcu_dereference(reuse->prog);
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
goto select_by_hash;
goto failure;
}
diff --git a/net/core/stream.c b/net/core/stream.c
index 06b36c730ce8..75fded8495f5 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -123,7 +123,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
DEFINE_WAIT_FUNC(wait, woken_wake_function);
if (sk_stream_memory_free(sk))
- current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
+ current_timeo = vm_wait = prandom_u32_max(HZ / 5) + 2;
add_wait_queue(sk_sleep(sk), &wait);
@@ -159,7 +159,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
*timeo_p = current_timeo;
}
out:
- remove_wait_queue(sk_sleep(sk), &wait);
+ if (!sock_flag(sk, SOCK_DEAD))
+ remove_wait_queue(sk_sleep(sk), &wait);
return err;
do_error:
@@ -196,13 +197,13 @@ void sk_stream_kill_queues(struct sock *sk)
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the write queue. */
- WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
+ WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_mem_reclaim_final(sk);
- WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON_ONCE(sk->sk_wmem_queued);
+ WARN_ON_ONCE(sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7b4d485aac7a..5b1ce656baa1 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,14 +23,12 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
-static int two = 2;
-static int three = 3;
+#include "dev.h"
+
static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
-static long long_one __maybe_unused = 1;
-static long long_max __maybe_unused = LONG_MAX;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -103,8 +101,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (orig_sock_table) {
static_branch_dec(&rps_needed);
static_branch_dec(&rfs_needed);
- synchronize_rcu();
- vfree(orig_sock_table);
+ kvfree_rcu(orig_sock_table);
}
}
}
@@ -142,8 +139,7 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
lockdep_is_held(&flow_limit_update_mutex));
if (cur && !cpumask_test_cpu(i, mask)) {
RCU_INIT_POINTER(sd->flow_limit, NULL);
- synchronize_rcu();
- kfree(cur);
+ kfree_rcu(cur);
} else if (!cur && cpumask_test_cpu(i, mask)) {
cur = kzalloc_node(len, GFP_KERNEL,
cpu_to_node(i));
@@ -237,14 +233,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
static int proc_do_dev_weight(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ static DEFINE_MUTEX(dev_weight_mutex);
+ int ret, weight;
+ mutex_lock(&dev_weight_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret != 0)
- return ret;
-
- dev_rx_weight = weight_p * dev_weight_rx_bias;
- dev_tx_weight = weight_p * dev_weight_tx_bias;
+ if (!ret && write) {
+ weight = READ_ONCE(weight_p);
+ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ }
+ mutex_unlock(&dev_weight_mutex);
return ret;
}
@@ -267,6 +266,8 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
loff_t *ppos)
{
int ret, jit_enable = *(int *)table->data;
+ int min = *(int *)table->extra1;
+ int max = *(int *)table->extra2;
struct ctl_table tmp = *table;
if (write && !capable(CAP_SYS_ADMIN))
@@ -284,6 +285,10 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
ret = -EPERM;
}
}
+
+ if (write && ret && min == max)
+ pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n");
+
return ret;
}
@@ -390,7 +395,7 @@ static struct ctl_table net_core_table[] = {
.extra2 = SYSCTL_ONE,
# else
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
# endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
@@ -401,7 +406,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dointvec_minmax_bpf_restricted,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_jit_kallsyms",
@@ -419,7 +424,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(long),
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
- .extra1 = &long_one,
+ .extra1 = SYSCTL_LONG_ONE,
.extra2 = &bpf_jit_limit_max,
},
#endif
@@ -546,7 +551,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "devconf_inherit_init_net",
@@ -555,7 +560,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "high_order_alloc_disable",
@@ -581,6 +586,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &int_3600,
},
+ {
+ .procname = "skb_defer_max",
+ .data = &sysctl_skb_defer_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
{ }
};
@@ -593,6 +606,15 @@ static struct ctl_table netns_core_table[] = {
.extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax
},
+ {
+ .procname = "txrehash",
+ .data = &init_net.core.sysctl_txrehash,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ .proc_handler = proc_dou8vec_minmax,
+ },
{ }
};
@@ -611,7 +633,7 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl;
+ struct ctl_table *tbl, *tmp;
tbl = netns_core_table;
if (!net_eq(net, &init_net)) {
@@ -619,7 +641,8 @@ static __net_init int sysctl_core_net_init(struct net *net)
if (tbl == NULL)
goto err_dup;
- tbl[0].data = &net->core.sysctl_somaxconn;
+ for (tmp = tbl; tmp->procname; tmp++)
+ tmp->data += (char *)net - (char *)&init_net;
/* Don't export any sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
diff --git a/net/core/utils.c b/net/core/utils.c
index 1f31a39236d5..938495bc1d34 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -476,9 +476,9 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
__wsum diff, bool pseudohdr)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
- *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+ csum_replace_by_diff(sum, diff);
if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_add(diff, ~skb->csum);
+ skb->csum = ~csum_sub(diff, skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7aba35504986..844c9d99dc0e 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
}
/* Returns 0 on success, negative on failure */
-int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
- struct net_device *dev, u32 queue_index, unsigned int napi_id)
+int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index,
+ unsigned int napi_id, u32 frag_size)
{
if (!dev) {
WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
xdp_rxq->dev = dev;
xdp_rxq->queue_index = queue_index;
xdp_rxq->napi_id = napi_id;
+ xdp_rxq->frag_size = frag_size;
xdp_rxq->reg_state = REG_STATE_REGISTERED;
return 0;
}
-EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
{
@@ -357,7 +359,8 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
if (IS_ERR(xdp_alloc))
return PTR_ERR(xdp_alloc);
- trace_mem_connect(xdp_alloc, xdp_rxq);
+ if (trace_mem_connect_enabled() && xdp_alloc)
+ trace_mem_connect(xdp_alloc, xdp_rxq);
return 0;
}
@@ -369,22 +372,20 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- struct xdp_buff *xdp)
+void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ struct xdp_buff *xdp)
{
- struct xdp_mem_allocator *xa;
struct page *page;
switch (mem->type) {
case MEM_TYPE_PAGE_POOL:
- rcu_read_lock();
- /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
- xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (napi_direct && xdp_return_frame_no_direct())
napi_direct = false;
- page_pool_put_full_page(xa->page_pool, page, napi_direct);
- rcu_read_unlock();
+ /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
+ * as mem->type knows this a page_pool page
+ */
+ page_pool_put_full_page(page->pp, page, napi_direct);
break;
case MEM_TYPE_PAGE_SHARED:
page_frag_free(data);
@@ -406,12 +407,38 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
void xdp_return_frame(struct xdp_frame *xdpf)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdpf->mem, false, NULL);
+ }
+out:
__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_frame_has_frags(xdpf)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdpf->mem, true, NULL);
+ }
+out:
__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -447,7 +474,7 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_mem_allocator *xa;
if (mem->type != MEM_TYPE_PAGE_POOL) {
- __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+ xdp_return_frame(xdpf);
return;
}
@@ -466,14 +493,41 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf,
bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
}
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ struct skb_shared_info *sinfo;
+ int i;
+
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ bq->q[bq->count++] = skb_frag_address(frag);
+ if (bq->count == XDP_BULK_QUEUE_SIZE)
+ xdp_flush_frame_bulk(bq);
+ }
+ }
bq->q[bq->count++] = xdpf->data;
}
EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
void xdp_return_buff(struct xdp_buff *xdp)
{
+ struct skb_shared_info *sinfo;
+ int i;
+
+ if (likely(!xdp_buff_has_frags(xdp)))
+ goto out;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&sinfo->frags[i]);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
+ }
+out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
}
+EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
@@ -561,8 +615,14 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
struct sk_buff *skb,
struct net_device *dev)
{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
unsigned int headroom, frame_size;
void *hard_start;
+ u8 nr_frags;
+
+ /* xdp frags frame */
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ nr_frags = sinfo->nr_frags;
/* Part of headroom was reserved to xdpf */
headroom = sizeof(*xdpf) + xdpf->headroom;
@@ -582,6 +642,12 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
if (xdpf->metasize)
skb_metadata_set(skb, xdpf->metasize);
+ if (unlikely(xdp_frame_has_frags(xdpf)))
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdpf->frame_sz,
+ xdp_frame_is_frag_pfmemalloc(xdpf));
+
/* Essential SKB info: protocol and skb->dev */
skb->protocol = eth_type_trans(skb, dev);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index b441ab330fd3..dc4fb699b56c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -2073,8 +2073,52 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
}
EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+static void dcbnl_flush_dev(struct net_device *dev)
+{
+ struct dcb_app_type *itr, *tmp;
+
+ spin_lock_bh(&dcb_lock);
+
+ list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) {
+ if (itr->ifindex == dev->ifindex) {
+ list_del(&itr->list);
+ kfree(itr);
+ }
+ }
+
+ spin_unlock_bh(&dcb_lock);
+}
+
+static int dcbnl_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ if (!dev->dcbnl_ops)
+ return NOTIFY_DONE;
+
+ dcbnl_flush_dev(dev);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block dcbnl_nb __read_mostly = {
+ .notifier_call = dcbnl_netdevice_event,
+};
+
static int __init dcbnl_init(void)
{
+ int err;
+
+ err = register_netdevice_notifier(&dcbnl_nb);
+ if (err)
+ return err;
+
rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5183e627468d..7dfc00c9fb32 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -136,11 +136,6 @@ static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
}
-static inline u64 max48(const u64 seq1, const u64 seq2)
-{
- return after48(seq1, seq2) ? seq1 : seq2;
-}
-
/**
* dccp_loss_count - Approximate the number of lost data packets in a burst loss
* @s1: last known sequence number before the loss ('hole')
@@ -298,8 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len);
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
__poll_t dccp_poll(struct file *file, struct socket *sock,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0ea29270d7e5..713b7b8dad7e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -45,10 +45,11 @@ static unsigned int dccp_v4_pernet_id __read_mostly;
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
__be16 orig_sport, orig_dport;
- __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
@@ -76,9 +77,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_DCCP,
- orig_sport, orig_dport, sk);
+ sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport,
+ orig_dport, sk);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -90,9 +90,29 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (inet_opt == NULL || !inet_opt->opt.srr)
daddr = fl4->daddr;
- if (inet->inet_saddr == 0)
+ if (inet->inet_saddr == 0) {
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ prev_addr_hashbucket =
+ inet_bhashfn_portaddr(&dccp_hashinfo, sk,
+ sock_net(sk),
+ inet->inet_num);
+ prev_sk_rcv_saddr = sk->sk_rcv_saddr;
+ }
inet->inet_saddr = fl4->saddr;
+ }
+
sk_rcv_saddr_set(sk, inet->inet_saddr);
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ inet->inet_saddr = 0;
+ sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
+ ip_rt_put(rt);
+ return err;
+ }
+ }
+
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
@@ -124,7 +144,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
inet->inet_dport);
- inet->inet_id = prandom_u32();
+ inet->inet_id = get_random_u16();
err = dccp_connect(sk);
rt = NULL;
@@ -423,7 +443,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
- newinet->inet_id = prandom_u32();
+ newinet->inet_id = get_random_u16();
if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
goto put_and_exit;
@@ -629,7 +649,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
ireq->ir_mark = inet_request_mark(sk, skb);
ireq->ireq_family = AF_INET;
- ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
/*
* Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fa663518fa0e..e57b43006074 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -374,10 +374,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
refcount_inc(&skb->users);
ireq->pktopts = skb;
}
- ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
/* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
+ if (!ireq->ir_iif &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = inet6_iif(skb);
@@ -892,7 +892,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
sin.sin_family = AF_INET;
@@ -934,8 +934,26 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (saddr == NULL) {
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ struct in6_addr prev_v6_rcv_saddr;
+
+ if (icsk->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo,
+ sk, sock_net(sk),
+ inet->inet_num);
+ prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ }
+
saddr = &fl6.saddr;
sk->sk_v6_rcv_saddr = *saddr;
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
+ goto failure;
+ }
+ }
}
/* set the source address */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 91e7a2202697..64d805b27add 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -22,6 +22,7 @@
#include "feat.h"
struct inet_timewait_death_row dccp_death_row = {
+ .tw_refcount = REFCOUNT_INIT(1),
.sysctl_max_tw_buckets = NR_FILE * 2,
.hashinfo = &dccp_hashinfo,
};
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a976b4d29892..c548ca3e9b0e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -736,11 +736,6 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- if (dccp_qpolicy_full(sk)) {
- rc = -EAGAIN;
- goto out_release;
- }
-
timeo = sock_sndtimeo(sk, noblock);
/*
@@ -759,6 +754,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (skb == NULL)
goto out_release;
+ if (dccp_qpolicy_full(sk)) {
+ rc = -EAGAIN;
+ goto out_discard;
+ }
+
if (sk->sk_state == DCCP_CLOSED) {
rc = -ENOTCONN;
goto out_discard;
@@ -791,8 +791,8 @@ out_discard:
EXPORT_SYMBOL_GPL(dccp_sendmsg);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
const struct dccp_hdr *dh;
long timeo;
@@ -804,7 +804,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
@@ -1110,7 +1110,6 @@ static int __init dccp_init(void)
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
sizeof_field(struct sk_buff, cb));
- inet_hashinfo_init(&dccp_hashinfo);
rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
if (rc)
goto out_fail;
@@ -1121,6 +1120,12 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
+ dccp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("dccp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
+ if (!dccp_hashinfo.bind2_bucket_cachep)
+ goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1151,7 +1156,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind_bucket_cachep;
+ goto out_free_bind2_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1177,14 +1182,26 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
+ dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
+ __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
+
+ if (!dccp_hashinfo.bhash2) {
+ DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
+ goto out_free_dccp_bhash;
+ }
+
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+ spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
+ INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
+ dccp_hashinfo.pernet = false;
+
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash;
+ goto out_free_dccp_bhash2;
rc = dccp_ackvec_init();
if (rc)
@@ -1208,30 +1225,38 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
+out_free_dccp_bhash2:
+ free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+out_free_bind2_bucket_cachep:
+ kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
+ dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
+ dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
+ int bhash_order = get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket));
+
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash,
- get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket)));
+ free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
deleted file mode 100644
index 24336bdb1054..000000000000
--- a/net/decnet/Kconfig
+++ /dev/null
@@ -1,43 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# DECnet configuration
-#
-config DECNET
- tristate "DECnet Support"
- help
- The DECnet networking protocol was used in many products made by
- Digital (now Compaq). It provides reliable stream and sequenced
- packet communications over which run a variety of services similar
- to those which run over TCP/IP.
-
- To find some tools to use with the kernel layer support, please
- look at Patrick Caulfield's web site:
- <http://linux-decnet.sourceforge.net/>.
-
- More detailed documentation is available in
- <file:Documentation/networking/decnet.rst>.
-
- Be sure to say Y to "/proc file system support" and "Sysctl support"
- below when using DECnet, since you will need sysctl support to aid
- in configuration at run time.
-
- The DECnet code is also available as a module ( = code which can be
- inserted in and removed from the running kernel whenever you want).
- The module is called decnet.
-
-config DECNET_ROUTER
- bool "DECnet: router support"
- depends on DECNET
- select FIB_RULES
- help
- Add support for turning your DECnet Endnode into a level 1 or 2
- router. This is an experimental, but functional option. If you
- do say Y here, then make sure that you also say Y to "Kernel/User
- network link driver", "Routing messages" and "Network packet
- filtering". The first two are required to allow configuration via
- rtnetlink (you will need Alexey Kuznetsov's iproute2 package
- from <ftp://ftp.tux.org/pub/net/ip-routing/>). The "Network packet
- filtering" option will be required for the forthcoming routing daemon
- to work.
-
- See <file:Documentation/networking/decnet.rst> for more information.
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
deleted file mode 100644
index 07b38e441b2d..000000000000
--- a/net/decnet/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_DECNET) += decnet.o
-
-decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \
- dn_route.o dn_dev.o dn_neigh.o dn_timer.o
-decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o
-decnet-y += sysctl_net_decnet.o
-
-obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/decnet/README b/net/decnet/README
deleted file mode 100644
index 60e7ec88c81f..000000000000
--- a/net/decnet/README
+++ /dev/null
@@ -1,8 +0,0 @@
- Linux DECnet Project
- ======================
-
-The documentation for this kernel subsystem is available in the
-Documentation/networking subdirectory of this distribution and also
-on line at http://www.chygwyn.com/DECnet/
-
-Steve Whitehouse <SteveW@ACM.org>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
deleted file mode 100644
index dc92a67baea3..000000000000
--- a/net/decnet/af_decnet.c
+++ /dev/null
@@ -1,2400 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Socket Layer Interface
- *
- * Authors: Eduardo Marcelo Serrat <emserrat@geocities.com>
- * Patrick Caulfield <patrick@pandh.demon.co.uk>
- *
- * Changes:
- * Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's
- * version of the code. Original copyright preserved
- * below.
- * Steve Whitehouse: Some bug fixes, cleaning up some code to make it
- * compatible with my routing layer.
- * Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick
- * Caulfield.
- * Steve Whitehouse: Further bug fixes, checking module code still works
- * with new routing layer.
- * Steve Whitehouse: Additional set/get_sockopt() calls.
- * Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new
- * code.
- * Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like
- * way. Didn't manage it entirely, but its better.
- * Steve Whitehouse: ditto for sendmsg().
- * Steve Whitehouse: A selection of bug fixes to various things.
- * Steve Whitehouse: Added TIOCOUTQ ioctl.
- * Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username.
- * Steve Whitehouse: Fixes to connect() error returns.
- * Patrick Caulfield: Fixes to delayed acceptance logic.
- * David S. Miller: New socket locking
- * Steve Whitehouse: Socket list hashing/locking
- * Arnaldo C. Melo: use capable, not suser
- * Steve Whitehouse: Removed unused code. Fix to use sk->allocation
- * when required.
- * Patrick Caulfield: /proc/net/decnet now has object name/number
- * Steve Whitehouse: Fixed local port allocation, hashed sk list
- * Matthew Wilcox: Fixes for dn_ioctl()
- * Steve Whitehouse: New connect/accept logic to allow timeouts and
- * prepare for sendpage etc.
- */
-
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-
-HISTORY:
-
-Version Kernel Date Author/Comments
-------- ------ ---- ---------------
-Version 0.0.1 2.0.30 01-dic-97 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
-
- First Development of DECnet Socket La-
- yer for Linux. Only supports outgoing
- connections.
-
-Version 0.0.2 2.1.105 20-jun-98 Patrick J. Caulfield
- (patrick@pandh.demon.co.uk)
-
- Port to new kernel development version.
-
-Version 0.0.3 2.1.106 25-jun-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- _
- Added support for incoming connections
- so we can start developing server apps
- on Linux.
- -
- Module Support
-Version 0.0.4 2.1.109 21-jul-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- _
- Added support for X11R6.4. Now we can
- use DECnet transport for X on Linux!!!
- -
-Version 0.0.5 2.1.110 01-aug-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- Removed bugs on flow control
- Removed bugs on incoming accessdata
- order
- -
-Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
- dn_recvmsg fixes
-
- Patrick J. Caulfield
- dn_bind fixes
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/netfilter.h>
-#include <linux/seq_file.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/flow.h>
-#include <asm/ioctls.h>
-#include <linux/capability.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/jiffies.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/fib_rules.h>
-#include <net/tcp.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-
-struct dn_sock {
- struct sock sk;
- struct dn_scp scp;
-};
-
-static void dn_keepalive(struct sock *sk);
-
-#define DN_SK_HASH_SHIFT 8
-#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT)
-#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
-
-
-static const struct proto_ops dn_proto_ops;
-static DEFINE_RWLOCK(dn_hash_lock);
-static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
-static struct hlist_head dn_wild_sk;
-static atomic_long_t decnet_memory_allocated;
-
-static int __dn_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen, int flags);
-static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
-
-static struct hlist_head *dn_find_list(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->addr.sdn_flags & SDF_WILD)
- return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
-
- return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK];
-}
-
-/*
- * Valid ports are those greater than zero and not already in use.
- */
-static int check_port(__le16 port)
-{
- struct sock *sk;
-
- if (port == 0)
- return -1;
-
- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
- struct dn_scp *scp = DN_SK(sk);
- if (scp->addrloc == port)
- return -1;
- }
- return 0;
-}
-
-static unsigned short port_alloc(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- static unsigned short port = 0x2000;
- unsigned short i_port = port;
-
- while(check_port(cpu_to_le16(++port)) != 0) {
- if (port == i_port)
- return 0;
- }
-
- scp->addrloc = cpu_to_le16(port);
-
- return 1;
-}
-
-/*
- * Since this is only ever called from user
- * level, we don't need a write_lock() version
- * of this.
- */
-static int dn_hash_sock(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct hlist_head *list;
- int rv = -EUSERS;
-
- BUG_ON(sk_hashed(sk));
-
- write_lock_bh(&dn_hash_lock);
-
- if (!scp->addrloc && !port_alloc(sk))
- goto out;
-
- rv = -EADDRINUSE;
- if ((list = dn_find_list(sk)) == NULL)
- goto out;
-
- sk_add_node(sk, list);
- rv = 0;
-out:
- write_unlock_bh(&dn_hash_lock);
- return rv;
-}
-
-static void dn_unhash_sock(struct sock *sk)
-{
- write_lock(&dn_hash_lock);
- sk_del_node_init(sk);
- write_unlock(&dn_hash_lock);
-}
-
-static void dn_unhash_sock_bh(struct sock *sk)
-{
- write_lock_bh(&dn_hash_lock);
- sk_del_node_init(sk);
- write_unlock_bh(&dn_hash_lock);
-}
-
-static struct hlist_head *listen_hash(struct sockaddr_dn *addr)
-{
- int i;
- unsigned int hash = addr->sdn_objnum;
-
- if (hash == 0) {
- hash = addr->sdn_objnamel;
- for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) {
- hash ^= addr->sdn_objname[i];
- hash ^= (hash << 3);
- }
- }
-
- return &dn_sk_hash[hash & DN_SK_HASH_MASK];
-}
-
-/*
- * Called to transform a socket from bound (i.e. with a local address)
- * into a listening socket (doesn't need a local port number) and rehashes
- * based upon the object name/number.
- */
-static void dn_rehash_sock(struct sock *sk)
-{
- struct hlist_head *list;
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->addr.sdn_flags & SDF_WILD)
- return;
-
- write_lock_bh(&dn_hash_lock);
- sk_del_node_init(sk);
- DN_SK(sk)->addrloc = 0;
- list = listen_hash(&DN_SK(sk)->addr);
- sk_add_node(sk, list);
- write_unlock_bh(&dn_hash_lock);
-}
-
-int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type)
-{
- int len = 2;
-
- *buf++ = type;
-
- switch (type) {
- case 0:
- *buf++ = sdn->sdn_objnum;
- break;
- case 1:
- *buf++ = 0;
- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
- len = 3 + le16_to_cpu(sdn->sdn_objnamel);
- break;
- case 2:
- memset(buf, 0, 5);
- buf += 5;
- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
- len = 7 + le16_to_cpu(sdn->sdn_objnamel);
- break;
- }
-
- return len;
-}
-
-/*
- * On reception of usernames, we handle types 1 and 0 for destination
- * addresses only. Types 2 and 4 are used for source addresses, but the
- * UIC, GIC are ignored and they are both treated the same way. Type 3
- * is never used as I've no idea what its purpose might be or what its
- * format is.
- */
-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt)
-{
- unsigned char type;
- int size = len;
- int namel = 12;
-
- sdn->sdn_objnum = 0;
- sdn->sdn_objnamel = cpu_to_le16(0);
- memset(sdn->sdn_objname, 0, DN_MAXOBJL);
-
- if (len < 2)
- return -1;
-
- len -= 2;
- *fmt = *data++;
- type = *data++;
-
- switch (*fmt) {
- case 0:
- sdn->sdn_objnum = type;
- return 2;
- case 1:
- namel = 16;
- break;
- case 2:
- len -= 4;
- data += 4;
- break;
- case 4:
- len -= 8;
- data += 8;
- break;
- default:
- return -1;
- }
-
- len -= 1;
-
- if (len < 0)
- return -1;
-
- sdn->sdn_objnamel = cpu_to_le16(*data++);
- len -= le16_to_cpu(sdn->sdn_objnamel);
-
- if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel))
- return -1;
-
- memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel));
-
- return size - len;
-}
-
-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
-{
- struct hlist_head *list = listen_hash(addr);
- struct sock *sk;
-
- read_lock(&dn_hash_lock);
- sk_for_each(sk, list) {
- struct dn_scp *scp = DN_SK(sk);
- if (sk->sk_state != TCP_LISTEN)
- continue;
- if (scp->addr.sdn_objnum) {
- if (scp->addr.sdn_objnum != addr->sdn_objnum)
- continue;
- } else {
- if (addr->sdn_objnum)
- continue;
- if (scp->addr.sdn_objnamel != addr->sdn_objnamel)
- continue;
- if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0)
- continue;
- }
- sock_hold(sk);
- read_unlock(&dn_hash_lock);
- return sk;
- }
-
- sk = sk_head(&dn_wild_sk);
- if (sk) {
- if (sk->sk_state == TCP_LISTEN)
- sock_hold(sk);
- else
- sk = NULL;
- }
-
- read_unlock(&dn_hash_lock);
- return sk;
-}
-
-struct sock *dn_find_by_skb(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sock *sk;
- struct dn_scp *scp;
-
- read_lock(&dn_hash_lock);
- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
- scp = DN_SK(sk);
- if (cb->src != dn_saddr2dn(&scp->peer))
- continue;
- if (cb->dst_port != scp->addrloc)
- continue;
- if (scp->addrrem && (cb->src_port != scp->addrrem))
- continue;
- sock_hold(sk);
- goto found;
- }
- sk = NULL;
-found:
- read_unlock(&dn_hash_lock);
- return sk;
-}
-
-
-
-static void dn_destruct(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- skb_queue_purge(&scp->data_xmit_queue);
- skb_queue_purge(&scp->other_xmit_queue);
- skb_queue_purge(&scp->other_receive_queue);
-
- dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
-}
-
-static unsigned long dn_memory_pressure;
-
-static void dn_enter_memory_pressure(struct sock *sk)
-{
- if (!dn_memory_pressure) {
- dn_memory_pressure = 1;
- }
-}
-
-static struct proto dn_proto = {
- .name = "NSP",
- .owner = THIS_MODULE,
- .enter_memory_pressure = dn_enter_memory_pressure,
- .memory_pressure = &dn_memory_pressure,
- .memory_allocated = &decnet_memory_allocated,
- .sysctl_mem = sysctl_decnet_mem,
- .sysctl_wmem = sysctl_decnet_wmem,
- .sysctl_rmem = sysctl_decnet_rmem,
- .max_header = DN_MAX_NSP_DATA_HEADER + 64,
- .obj_size = sizeof(struct dn_sock),
-};
-
-static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
-{
- struct dn_scp *scp;
- struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
-
- if (!sk)
- goto out;
-
- if (sock)
- sock->ops = &dn_proto_ops;
- sock_init_data(sock, sk);
-
- sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
- sk->sk_destruct = dn_destruct;
- sk->sk_no_check_tx = 1;
- sk->sk_family = PF_DECnet;
- sk->sk_protocol = 0;
- sk->sk_allocation = gfp;
- sk->sk_sndbuf = sysctl_decnet_wmem[1];
- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
-
- /* Initialization of DECnet Session Control Port */
- scp = DN_SK(sk);
- scp->state = DN_O; /* Open */
- scp->numdat = 1; /* Next data seg to tx */
- scp->numoth = 1; /* Next oth data to tx */
- scp->ackxmt_dat = 0; /* Last data seg ack'ed */
- scp->ackxmt_oth = 0; /* Last oth data ack'ed */
- scp->ackrcv_dat = 0; /* Highest data ack recv*/
- scp->ackrcv_oth = 0; /* Last oth data ack rec*/
- scp->flowrem_sw = DN_SEND;
- scp->flowloc_sw = DN_SEND;
- scp->flowrem_dat = 0;
- scp->flowrem_oth = 1;
- scp->flowloc_dat = 0;
- scp->flowloc_oth = 1;
- scp->services_rem = 0;
- scp->services_loc = 1 | NSP_FC_NONE;
- scp->info_rem = 0;
- scp->info_loc = 0x03; /* NSP version 4.1 */
- scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */
- scp->nonagle = 0;
- scp->multi_ireq = 1;
- scp->accept_mode = ACC_IMMED;
- scp->addr.sdn_family = AF_DECnet;
- scp->peer.sdn_family = AF_DECnet;
- scp->accessdata.acc_accl = 5;
- memcpy(scp->accessdata.acc_acc, "LINUX", 5);
-
- scp->max_window = NSP_MAX_WINDOW;
- scp->snd_window = NSP_MIN_WINDOW;
- scp->nsp_srtt = NSP_INITIAL_SRTT;
- scp->nsp_rttvar = NSP_INITIAL_RTTVAR;
- scp->nsp_rxtshift = 0;
-
- skb_queue_head_init(&scp->data_xmit_queue);
- skb_queue_head_init(&scp->other_xmit_queue);
- skb_queue_head_init(&scp->other_receive_queue);
-
- scp->persist = 0;
- scp->persist_fxn = NULL;
- scp->keepalive = 10 * HZ;
- scp->keepalive_fxn = dn_keepalive;
-
- dn_start_slow_timer(sk);
-out:
- return sk;
-}
-
-/*
- * Keepalive timer.
- * FIXME: Should respond to SO_KEEPALIVE etc.
- */
-static void dn_keepalive(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- /*
- * By checking the other_data transmit queue is empty
- * we are double checking that we are not sending too
- * many of these keepalive frames.
- */
- if (skb_queue_empty(&scp->other_xmit_queue))
- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
-}
-
-
-/*
- * Timer for shutdown/destroyed sockets.
- * When socket is dead & no packets have been sent for a
- * certain amount of time, they are removed by this
- * routine. Also takes care of sending out DI & DC
- * frames at correct times.
- */
-int dn_destroy_timer(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- scp->persist = dn_nsp_persist(sk);
-
- switch (scp->state) {
- case DN_DI:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
- if (scp->nsp_rxtshift >= decnet_di_count)
- scp->state = DN_CN;
- return 0;
-
- case DN_DR:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
- if (scp->nsp_rxtshift >= decnet_dr_count)
- scp->state = DN_DRC;
- return 0;
-
- case DN_DN:
- if (scp->nsp_rxtshift < decnet_dn_count) {
- /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
- GFP_ATOMIC);
- return 0;
- }
- }
-
- scp->persist = (HZ * decnet_time_wait);
-
- if (sk->sk_socket)
- return 0;
-
- if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) {
- dn_unhash_sock(sk);
- sock_put(sk);
- return 1;
- }
-
- return 0;
-}
-
-static void dn_destroy_sock(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- scp->nsp_rxtshift = 0; /* reset back off */
-
- if (sk->sk_socket) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- }
-
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_DN:
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
- sk->sk_allocation);
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
- break;
- case DN_CR:
- scp->state = DN_DR;
- goto disc_reject;
- case DN_RUN:
- scp->state = DN_DI;
- fallthrough;
- case DN_DI:
- case DN_DR:
-disc_reject:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
- fallthrough;
- case DN_NC:
- case DN_NR:
- case DN_RJ:
- case DN_DIC:
- case DN_CN:
- case DN_DRC:
- case DN_CI:
- case DN_CD:
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
- break;
- default:
- printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
- fallthrough;
- case DN_O:
- dn_stop_slow_timer(sk);
-
- dn_unhash_sock_bh(sk);
- sock_put(sk);
-
- break;
- }
-}
-
-char *dn_addr2asc(__u16 addr, char *buf)
-{
- unsigned short node, area;
-
- node = addr & 0x03ff;
- area = addr >> 10;
- sprintf(buf, "%hd.%hd", area, node);
-
- return buf;
-}
-
-
-
-static int dn_create(struct net *net, struct socket *sock, int protocol,
- int kern)
-{
- struct sock *sk;
-
- if (protocol < 0 || protocol > U8_MAX)
- return -EINVAL;
-
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
- switch (sock->type) {
- case SOCK_SEQPACKET:
- if (protocol != DNPROTO_NSP)
- return -EPROTONOSUPPORT;
- break;
- case SOCK_STREAM:
- break;
- default:
- return -ESOCKTNOSUPPORT;
- }
-
-
- if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
- return -ENOBUFS;
-
- sk->sk_protocol = protocol;
-
- return 0;
-}
-
-
-static int
-dn_release(struct socket *sock)
-{
- struct sock *sk = sock->sk;
-
- if (sk) {
- sock_orphan(sk);
- sock_hold(sk);
- lock_sock(sk);
- dn_destroy_sock(sk);
- release_sock(sk);
- sock_put(sk);
- }
-
- return 0;
-}
-
-static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
- struct net_device *dev, *ldev;
- int rv;
-
- if (addr_len != sizeof(struct sockaddr_dn))
- return -EINVAL;
-
- if (saddr->sdn_family != AF_DECnet)
- return -EINVAL;
-
- if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2))
- return -EINVAL;
-
- if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL)
- return -EINVAL;
-
- if (saddr->sdn_flags & ~SDF_WILD)
- return -EINVAL;
-
- if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum ||
- (saddr->sdn_flags & SDF_WILD)))
- return -EACCES;
-
- if (!(saddr->sdn_flags & SDF_WILD)) {
- if (le16_to_cpu(saddr->sdn_nodeaddrl)) {
- rcu_read_lock();
- ldev = NULL;
- for_each_netdev_rcu(&init_net, dev) {
- if (!dev->dn_ptr)
- continue;
- if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
- ldev = dev;
- break;
- }
- }
- rcu_read_unlock();
- if (ldev == NULL)
- return -EADDRNOTAVAIL;
- }
- }
-
- rv = -EINVAL;
- lock_sock(sk);
- if (sock_flag(sk, SOCK_ZAPPED)) {
- memcpy(&scp->addr, saddr, addr_len);
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- rv = dn_hash_sock(sk);
- if (rv)
- sock_set_flag(sk, SOCK_ZAPPED);
- }
- release_sock(sk);
-
- return rv;
-}
-
-
-static int dn_auto_bind(struct socket *sock)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int rv;
-
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- scp->addr.sdn_flags = 0;
- scp->addr.sdn_objnum = 0;
-
- /*
- * This stuff is to keep compatibility with Eduardo's
- * patch. I hope I can dispense with it shortly...
- */
- if ((scp->accessdata.acc_accl != 0) &&
- (scp->accessdata.acc_accl <= 12)) {
-
- scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl);
- memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel));
-
- scp->accessdata.acc_accl = 0;
- memset(scp->accessdata.acc_acc, 0, 40);
- }
- /* End of compatibility stuff */
-
- scp->addr.sdn_add.a_len = cpu_to_le16(2);
- rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr);
- if (rv == 0) {
- rv = dn_hash_sock(sk);
- if (rv)
- sock_set_flag(sk, SOCK_ZAPPED);
- }
-
- return rv;
-}
-
-static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
-{
- struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int err;
-
- if (scp->state != DN_CR)
- return -EINVAL;
-
- scp->state = DN_CC;
- scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
- dn_send_conn_conf(sk, allocation);
-
- add_wait_queue(sk_sleep(sk), &wait);
- for(;;) {
- release_sock(sk);
- if (scp->state == DN_CC)
- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
- lock_sock(sk);
- err = 0;
- if (scp->state == DN_RUN)
- break;
- err = sock_error(sk);
- if (err)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -EAGAIN;
- if (!*timeo)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
- if (err == 0) {
- sk->sk_socket->state = SS_CONNECTED;
- } else if (scp->state != DN_CC) {
- sk->sk_socket->state = SS_UNCONNECTED;
- }
- return err;
-}
-
-static int dn_wait_run(struct sock *sk, long *timeo)
-{
- struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int err = 0;
-
- if (scp->state == DN_RUN)
- goto out;
-
- if (!*timeo)
- return -EALREADY;
-
- add_wait_queue(sk_sleep(sk), &wait);
- for(;;) {
- release_sock(sk);
- if (scp->state == DN_CI || scp->state == DN_CC)
- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
- lock_sock(sk);
- err = 0;
- if (scp->state == DN_RUN)
- break;
- err = sock_error(sk);
- if (err)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -ETIMEDOUT;
- if (!*timeo)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
-out:
- if (err == 0) {
- sk->sk_socket->state = SS_CONNECTED;
- } else if (scp->state != DN_CI && scp->state != DN_CC) {
- sk->sk_socket->state = SS_UNCONNECTED;
- }
- return err;
-}
-
-static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
-{
- struct socket *sock = sk->sk_socket;
- struct dn_scp *scp = DN_SK(sk);
- int err = -EISCONN;
- struct flowidn fld;
- struct dst_entry *dst;
-
- if (sock->state == SS_CONNECTED)
- goto out;
-
- if (sock->state == SS_CONNECTING) {
- err = 0;
- if (scp->state == DN_RUN) {
- sock->state = SS_CONNECTED;
- goto out;
- }
- err = -ECONNREFUSED;
- if (scp->state != DN_CI && scp->state != DN_CC) {
- sock->state = SS_UNCONNECTED;
- goto out;
- }
- return dn_wait_run(sk, timeo);
- }
-
- err = -EINVAL;
- if (scp->state != DN_O)
- goto out;
-
- if (addr == NULL || addrlen != sizeof(struct sockaddr_dn))
- goto out;
- if (addr->sdn_family != AF_DECnet)
- goto out;
- if (addr->sdn_flags & SDF_WILD)
- goto out;
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- err = dn_auto_bind(sk->sk_socket);
- if (err)
- goto out;
- }
-
- memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
-
- err = -EHOSTUNREACH;
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_oif = sk->sk_bound_dev_if;
- fld.daddr = dn_saddr2dn(&scp->peer);
- fld.saddr = dn_saddr2dn(&scp->addr);
- dn_sk_ports_copy(&fld, scp);
- fld.flowidn_proto = DNPROTO_NSP;
- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
- goto out;
- dst = __sk_dst_get(sk);
- sk->sk_route_caps = dst->dev->features;
- sock->state = SS_CONNECTING;
- scp->state = DN_CI;
- scp->segsize_loc = dst_metric_advmss(dst);
-
- dn_nsp_send_conninit(sk, NSP_CI);
- err = -EINPROGRESS;
- if (*timeo) {
- err = dn_wait_run(sk, timeo);
- }
-out:
- return err;
-}
-
-static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags)
-{
- struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr;
- struct sock *sk = sock->sk;
- int err;
- long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
- lock_sock(sk);
- err = __dn_connect(sk, addr, addrlen, &timeo, 0);
- release_sock(sk);
-
- return err;
-}
-
-static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- switch (scp->state) {
- case DN_RUN:
- return 0;
- case DN_CR:
- return dn_confirm_accept(sk, timeo, sk->sk_allocation);
- case DN_CI:
- case DN_CC:
- return dn_wait_run(sk, timeo);
- case DN_O:
- return __dn_connect(sk, addr, addrlen, timeo, flags);
- }
-
- return -EINVAL;
-}
-
-
-static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc)
-{
- unsigned char *ptr = skb->data;
-
- acc->acc_userl = *ptr++;
- memcpy(&acc->acc_user, ptr, acc->acc_userl);
- ptr += acc->acc_userl;
-
- acc->acc_passl = *ptr++;
- memcpy(&acc->acc_pass, ptr, acc->acc_passl);
- ptr += acc->acc_passl;
-
- acc->acc_accl = *ptr++;
- memcpy(&acc->acc_acc, ptr, acc->acc_accl);
-
- skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3);
-
-}
-
-static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
-{
- unsigned char *ptr = skb->data;
- u16 len = *ptr++; /* yes, it's 8bit on the wire */
-
- BUG_ON(len > 16); /* we've checked the contents earlier */
- opt->opt_optl = cpu_to_le16(len);
- opt->opt_status = 0;
- memcpy(opt->opt_data, ptr, len);
- skb_pull(skb, len + 1);
-}
-
-static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct sk_buff *skb = NULL;
- int err = 0;
-
- add_wait_queue(sk_sleep(sk), &wait);
- for(;;) {
- release_sock(sk);
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
- skb = skb_dequeue(&sk->sk_receive_queue);
- }
- lock_sock(sk);
- if (skb != NULL)
- break;
- err = -EINVAL;
- if (sk->sk_state != TCP_LISTEN)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -EAGAIN;
- if (!*timeo)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
-
- return skb == NULL ? ERR_PTR(err) : skb;
-}
-
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
-{
- struct sock *sk = sock->sk, *newsk;
- struct sk_buff *skb = NULL;
- struct dn_skb_cb *cb;
- unsigned char menuver;
- int err = 0;
- unsigned char type;
- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
- struct dst_entry *dst;
-
- lock_sock(sk);
-
- if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) {
- release_sock(sk);
- return -EINVAL;
- }
-
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- skb = dn_wait_for_connect(sk, &timeo);
- if (IS_ERR(skb)) {
- release_sock(sk);
- return PTR_ERR(skb);
- }
- }
-
- cb = DN_SKB_CB(skb);
- sk_acceptq_removed(sk);
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
- if (newsk == NULL) {
- release_sock(sk);
- kfree_skb(skb);
- return -ENOBUFS;
- }
- release_sock(sk);
-
- dst = skb_dst(skb);
- sk_dst_set(newsk, dst);
- skb_dst_set(skb, NULL);
-
- DN_SK(newsk)->state = DN_CR;
- DN_SK(newsk)->addrrem = cb->src_port;
- DN_SK(newsk)->services_rem = cb->services;
- DN_SK(newsk)->info_rem = cb->info;
- DN_SK(newsk)->segsize_rem = cb->segsize;
- DN_SK(newsk)->accept_mode = DN_SK(sk)->accept_mode;
-
- if (DN_SK(newsk)->segsize_rem < 230)
- DN_SK(newsk)->segsize_rem = 230;
-
- if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
- DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd;
-
- newsk->sk_state = TCP_LISTEN;
- memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn));
-
- /*
- * If we are listening on a wild socket, we don't want
- * the newly created socket on the wrong hash queue.
- */
- DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD;
-
- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type));
- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type));
- *(__le16 *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src;
- *(__le16 *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst;
-
- menuver = *skb->data;
- skb_pull(skb, 1);
-
- if (menuver & DN_MENUVER_ACC)
- dn_access_copy(skb, &(DN_SK(newsk)->accessdata));
-
- if (menuver & DN_MENUVER_USR)
- dn_user_copy(skb, &(DN_SK(newsk)->conndata_in));
-
- if (menuver & DN_MENUVER_PRX)
- DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY;
-
- if (menuver & DN_MENUVER_UIC)
- DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY;
-
- kfree_skb(skb);
-
- memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out),
- sizeof(struct optdata_dn));
- memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out),
- sizeof(struct optdata_dn));
-
- lock_sock(newsk);
- err = dn_hash_sock(newsk);
- if (err == 0) {
- sock_reset_flag(newsk, SOCK_ZAPPED);
- dn_send_conn_ack(newsk);
-
- /*
- * Here we use sk->sk_allocation since although the conn conf is
- * for the newsk, the context is the old socket.
- */
- if (DN_SK(newsk)->accept_mode == ACC_IMMED)
- err = dn_confirm_accept(newsk, &timeo,
- sk->sk_allocation);
- }
- release_sock(newsk);
- return err;
-}
-
-
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
-{
- struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
-
- lock_sock(sk);
-
- if (peer) {
- if ((sock->state != SS_CONNECTED &&
- sock->state != SS_CONNECTING) &&
- scp->accept_mode == ACC_IMMED) {
- release_sock(sk);
- return -ENOTCONN;
- }
-
- memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn));
- } else {
- memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn));
- }
-
- release_sock(sk);
-
- return sizeof(struct sockaddr_dn);
-}
-
-
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
-
- if (!skb_queue_empty_lockless(&scp->other_receive_queue))
- mask |= EPOLLRDBAND;
-
- return mask;
-}
-
-static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int err = -EOPNOTSUPP;
- long amount = 0;
- struct sk_buff *skb;
- int val;
-
- switch(cmd)
- {
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- return dn_dev_ioctl(cmd, (void __user *)arg);
-
- case SIOCATMARK:
- lock_sock(sk);
- val = !skb_queue_empty(&scp->other_receive_queue);
- if (scp->state != DN_RUN)
- val = -ENOTCONN;
- release_sock(sk);
- return val;
-
- case TIOCOUTQ:
- amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- err = put_user(amount, (int __user *)arg);
- break;
-
- case TIOCINQ:
- lock_sock(sk);
- skb = skb_peek(&scp->other_receive_queue);
- if (skb) {
- amount = skb->len;
- } else {
- skb_queue_walk(&sk->sk_receive_queue, skb)
- amount += skb->len;
- }
- release_sock(sk);
- err = put_user(amount, (int __user *)arg);
- break;
-
- default:
- err = -ENOIOCTLCMD;
- break;
- }
-
- return err;
-}
-
-static int dn_listen(struct socket *sock, int backlog)
-{
- struct sock *sk = sock->sk;
- int err = -EINVAL;
-
- lock_sock(sk);
-
- if (sock_flag(sk, SOCK_ZAPPED))
- goto out;
-
- if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN))
- goto out;
-
- sk->sk_max_ack_backlog = backlog;
- sk->sk_ack_backlog = 0;
- sk->sk_state = TCP_LISTEN;
- err = 0;
- dn_rehash_sock(sk);
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-
-static int dn_shutdown(struct socket *sock, int how)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int err = -ENOTCONN;
-
- lock_sock(sk);
-
- if (sock->state == SS_UNCONNECTED)
- goto out;
-
- err = 0;
- if (sock->state == SS_DISCONNECTING)
- goto out;
-
- err = -EINVAL;
- if (scp->state == DN_O)
- goto out;
-
- if (how != SHUT_RDWR)
- goto out;
-
- sk->sk_shutdown = SHUTDOWN_MASK;
- dn_destroy_sock(sk);
- err = 0;
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-static int dn_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
- int err;
-
- lock_sock(sk);
- err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
- release_sock(sk);
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
- optname != DSO_STREAM && optname != DSO_SEQPACKET)
- err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
-#endif
-
- return err;
-}
-
-static int __dn_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int optlen, int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- long timeo;
- union {
- struct optdata_dn opt;
- struct accessdata_dn acc;
- int mode;
- unsigned long win;
- int val;
- unsigned char services;
- unsigned char info;
- } u;
- int err;
-
- if (optlen && sockptr_is_null(optval))
- return -EINVAL;
-
- if (optlen > sizeof(u))
- return -EINVAL;
-
- if (copy_from_sockptr(&u, optval, optlen))
- return -EFAULT;
-
- switch (optname) {
- case DSO_CONDATA:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if ((scp->state != DN_O) && (scp->state != DN_CR))
- return -EINVAL;
-
- if (optlen != sizeof(struct optdata_dn))
- return -EINVAL;
-
- if (le16_to_cpu(u.opt.opt_optl) > 16)
- return -EINVAL;
-
- memcpy(&scp->conndata_out, &u.opt, optlen);
- break;
-
- case DSO_DISDATA:
- if (sock->state != SS_CONNECTED &&
- scp->accept_mode == ACC_IMMED)
- return -ENOTCONN;
-
- if (optlen != sizeof(struct optdata_dn))
- return -EINVAL;
-
- if (le16_to_cpu(u.opt.opt_optl) > 16)
- return -EINVAL;
-
- memcpy(&scp->discdata_out, &u.opt, optlen);
- break;
-
- case DSO_CONACCESS:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if (scp->state != DN_O)
- return -EINVAL;
-
- if (optlen != sizeof(struct accessdata_dn))
- return -EINVAL;
-
- if ((u.acc.acc_accl > DN_MAXACCL) ||
- (u.acc.acc_passl > DN_MAXACCL) ||
- (u.acc.acc_userl > DN_MAXACCL))
- return -EINVAL;
-
- memcpy(&scp->accessdata, &u.acc, optlen);
- break;
-
- case DSO_ACCEPTMODE:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if (scp->state != DN_O)
- return -EINVAL;
-
- if (optlen != sizeof(int))
- return -EINVAL;
-
- if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER))
- return -EINVAL;
-
- scp->accept_mode = (unsigned char)u.mode;
- break;
-
- case DSO_CONACCEPT:
- if (scp->state != DN_CR)
- return -EINVAL;
- timeo = sock_rcvtimeo(sk, 0);
- err = dn_confirm_accept(sk, &timeo, sk->sk_allocation);
- return err;
-
- case DSO_CONREJECT:
- if (scp->state != DN_CR)
- return -EINVAL;
-
- scp->state = DN_DR;
- sk->sk_shutdown = SHUTDOWN_MASK;
- dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
- break;
-
- case DSO_MAXWINDOW:
- if (optlen != sizeof(unsigned long))
- return -EINVAL;
- if (u.win > NSP_MAX_WINDOW)
- u.win = NSP_MAX_WINDOW;
- if (u.win == 0)
- return -EINVAL;
- scp->max_window = u.win;
- if (scp->snd_window > u.win)
- scp->snd_window = u.win;
- break;
-
- case DSO_NODELAY:
- if (optlen != sizeof(int))
- return -EINVAL;
- if (scp->nonagle == TCP_NAGLE_CORK)
- return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
- /* if (scp->nonagle == 1) { Push pending frames } */
- break;
-
- case DSO_CORK:
- if (optlen != sizeof(int))
- return -EINVAL;
- if (scp->nonagle == TCP_NAGLE_OFF)
- return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
- /* if (scp->nonagle == 0) { Push pending frames } */
- break;
-
- case DSO_SERVICES:
- if (optlen != sizeof(unsigned char))
- return -EINVAL;
- if ((u.services & ~NSP_FC_MASK) != 0x01)
- return -EINVAL;
- if ((u.services & NSP_FC_MASK) == NSP_FC_MASK)
- return -EINVAL;
- scp->services_loc = u.services;
- break;
-
- case DSO_INFO:
- if (optlen != sizeof(unsigned char))
- return -EINVAL;
- if (u.info & 0xfc)
- return -EINVAL;
- scp->info_loc = u.info;
- break;
-
- case DSO_LINKINFO:
- case DSO_STREAM:
- case DSO_SEQPACKET:
- default:
- return -ENOPROTOOPT;
- }
-
- return 0;
-}
-
-static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
- int err;
-
- lock_sock(sk);
- err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
- release_sock(sk);
-#ifdef CONFIG_NETFILTER
- if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
- optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
- optname != DSO_CONREJECT) {
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
- if (err >= 0)
- err = put_user(len, optlen);
- }
-#endif
-
- return err;
-}
-
-static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct linkinfo_dn link;
- unsigned int r_len;
- void *r_data = NULL;
- unsigned int val;
-
- if(get_user(r_len , optlen))
- return -EFAULT;
-
- switch (optname) {
- case DSO_CONDATA:
- if (r_len > sizeof(struct optdata_dn))
- r_len = sizeof(struct optdata_dn);
- r_data = &scp->conndata_in;
- break;
-
- case DSO_DISDATA:
- if (r_len > sizeof(struct optdata_dn))
- r_len = sizeof(struct optdata_dn);
- r_data = &scp->discdata_in;
- break;
-
- case DSO_CONACCESS:
- if (r_len > sizeof(struct accessdata_dn))
- r_len = sizeof(struct accessdata_dn);
- r_data = &scp->accessdata;
- break;
-
- case DSO_ACCEPTMODE:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->accept_mode;
- break;
-
- case DSO_LINKINFO:
- if (r_len > sizeof(struct linkinfo_dn))
- r_len = sizeof(struct linkinfo_dn);
-
- memset(&link, 0, sizeof(link));
-
- switch (sock->state) {
- case SS_CONNECTING:
- link.idn_linkstate = LL_CONNECTING;
- break;
- case SS_DISCONNECTING:
- link.idn_linkstate = LL_DISCONNECTING;
- break;
- case SS_CONNECTED:
- link.idn_linkstate = LL_RUNNING;
- break;
- default:
- link.idn_linkstate = LL_INACTIVE;
- }
-
- link.idn_segsize = scp->segsize_rem;
- r_data = &link;
- break;
-
- case DSO_MAXWINDOW:
- if (r_len > sizeof(unsigned long))
- r_len = sizeof(unsigned long);
- r_data = &scp->max_window;
- break;
-
- case DSO_NODELAY:
- if (r_len > sizeof(int))
- r_len = sizeof(int);
- val = (scp->nonagle == TCP_NAGLE_OFF);
- r_data = &val;
- break;
-
- case DSO_CORK:
- if (r_len > sizeof(int))
- r_len = sizeof(int);
- val = (scp->nonagle == TCP_NAGLE_CORK);
- r_data = &val;
- break;
-
- case DSO_SERVICES:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->services_rem;
- break;
-
- case DSO_INFO:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->info_rem;
- break;
-
- case DSO_STREAM:
- case DSO_SEQPACKET:
- case DSO_CONACCEPT:
- case DSO_CONREJECT:
- default:
- return -ENOPROTOOPT;
- }
-
- if (r_data) {
- if (copy_to_user(optval, r_data, r_len))
- return -EFAULT;
- if (put_user(r_len, optlen))
- return -EFAULT;
- }
-
- return 0;
-}
-
-
-static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target)
-{
- struct sk_buff *skb;
- int len = 0;
-
- if (flags & MSG_OOB)
- return !skb_queue_empty(q) ? 1 : 0;
-
- skb_queue_walk(q, skb) {
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- len += skb->len;
-
- if (cb->nsp_flags & 0x40) {
- /* SOCK_SEQPACKET reads to EOM */
- if (sk->sk_type == SOCK_SEQPACKET)
- return 1;
- /* so does SOCK_STREAM unless WAITALL is specified */
- if (!(flags & MSG_WAITALL))
- return 1;
- }
-
- /* minimum data length for read exceeded */
- if (len >= target)
- return 1;
- }
-
- return 0;
-}
-
-
-static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
- int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff_head *queue = &sk->sk_receive_queue;
- size_t target = size > 1 ? 1 : 0;
- size_t copied = 0;
- int rv = 0;
- struct sk_buff *skb, *n;
- struct dn_skb_cb *cb = NULL;
- unsigned char eor = 0;
- long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- rv = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (sk->sk_shutdown & RCV_SHUTDOWN) {
- rv = 0;
- goto out;
- }
-
- rv = dn_check_state(sk, NULL, 0, &timeo, flags);
- if (rv)
- goto out;
-
- if (flags & ~(MSG_CMSG_COMPAT|MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) {
- rv = -EOPNOTSUPP;
- goto out;
- }
-
- if (flags & MSG_OOB)
- queue = &scp->other_receive_queue;
-
- if (flags & MSG_WAITALL)
- target = size;
-
-
- /*
- * See if there is data ready to read, sleep if there isn't
- */
- for(;;) {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- if (sk->sk_err)
- goto out;
-
- if (!skb_queue_empty(&scp->other_receive_queue)) {
- if (!(flags & MSG_OOB)) {
- msg->msg_flags |= MSG_OOB;
- if (!scp->other_report) {
- scp->other_report = 1;
- goto out;
- }
- }
- }
-
- if (scp->state != DN_RUN)
- goto out;
-
- if (signal_pending(current)) {
- rv = sock_intr_errno(timeo);
- goto out;
- }
-
- if (dn_data_ready(sk, queue, flags, target))
- break;
-
- if (flags & MSG_DONTWAIT) {
- rv = -EWOULDBLOCK;
- goto out;
- }
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- }
-
- skb_queue_walk_safe(queue, skb, n) {
- unsigned int chunk = skb->len;
- cb = DN_SKB_CB(skb);
-
- if ((chunk + copied) > size)
- chunk = size - copied;
-
- if (memcpy_to_msg(msg, skb->data, chunk)) {
- rv = -EFAULT;
- break;
- }
- copied += chunk;
-
- if (!(flags & MSG_PEEK))
- skb_pull(skb, chunk);
-
- eor = cb->nsp_flags & 0x40;
-
- if (skb->len == 0) {
- skb_unlink(skb, queue);
- kfree_skb(skb);
- /*
- * N.B. Don't refer to skb or cb after this point
- * in loop.
- */
- if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) {
- scp->flowloc_sw = DN_SEND;
- dn_nsp_send_link(sk, DN_SEND, 0);
- }
- }
-
- if (eor) {
- if (sk->sk_type == SOCK_SEQPACKET)
- break;
- if (!(flags & MSG_WAITALL))
- break;
- }
-
- if (flags & MSG_OOB)
- break;
-
- if (copied >= target)
- break;
- }
-
- rv = copied;
-
-
- if (eor && (sk->sk_type == SOCK_SEQPACKET))
- msg->msg_flags |= MSG_EOR;
-
-out:
- if (rv == 0)
- rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk);
-
- if ((rv >= 0) && msg->msg_name) {
- __sockaddr_check_size(sizeof(struct sockaddr_dn));
- memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn));
- msg->msg_namelen = sizeof(struct sockaddr_dn);
- }
-
- release_sock(sk);
-
- return rv;
-}
-
-
-static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags)
-{
- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
- if (skb_queue_len(queue) >= scp->snd_window)
- return 1;
- if (fctype != NSP_FC_NONE) {
- if (flags & MSG_OOB) {
- if (scp->flowrem_oth == 0)
- return 1;
- } else {
- if (scp->flowrem_dat == 0)
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * The DECnet spec requires that the "routing layer" accepts packets which
- * are at least 230 bytes in size. This excludes any headers which the NSP
- * layer might add, so we always assume that we'll be using the maximal
- * length header on data packets. The variation in length is due to the
- * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't
- * make much practical difference.
- */
-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu)
-{
- unsigned int mss = 230 - DN_MAX_NSP_DATA_HEADER;
- if (dev) {
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- mtu -= LL_RESERVED_SPACE(dev);
- if (dn_db->use_long)
- mtu -= 21;
- else
- mtu -= 6;
- mtu -= DN_MAX_NSP_DATA_HEADER;
- } else {
- /*
- * 21 = long header, 16 = guess at MAC header length
- */
- mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16);
- }
- if (mtu > mss)
- mss = mtu;
- return mss;
-}
-
-static inline unsigned int dn_current_mss(struct sock *sk, int flags)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- struct dn_scp *scp = DN_SK(sk);
- int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem);
-
- /* Other data messages are limited to 16 bytes per packet */
- if (flags & MSG_OOB)
- return 16;
-
- /* This works out the maximum size of segment we can send out */
- if (dst) {
- u32 mtu = dst_mtu(dst);
- mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now);
- }
-
- return mss_now;
-}
-
-/*
- * N.B. We get the timeout wrong here, but then we always did get it
- * wrong before and this is another step along the road to correcting
- * it. It ought to get updated each time we pass through the routine,
- * but in practise it probably doesn't matter too much for now.
- */
-static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
- unsigned long datalen, int noblock,
- int *errcode)
-{
- struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
- noblock, errcode);
- if (skb) {
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->pkt_type = PACKET_OUTGOING;
- }
- return skb;
-}
-
-static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- size_t mss;
- struct sk_buff_head *queue = &scp->data_xmit_queue;
- int flags = msg->msg_flags;
- int err = 0;
- size_t sent = 0;
- int addr_len = msg->msg_namelen;
- DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name);
- struct sk_buff *skb = NULL;
- struct dn_skb_cb *cb;
- size_t len;
- unsigned char fctype;
- long timeo;
-
- if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
- return -EOPNOTSUPP;
-
- if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
- return -EINVAL;
-
- lock_sock(sk);
- timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /*
- * The only difference between stream sockets and sequenced packet
- * sockets is that the stream sockets always behave as if MSG_EOR
- * has been set.
- */
- if (sock->type == SOCK_STREAM) {
- if (flags & MSG_EOR) {
- err = -EINVAL;
- goto out;
- }
- flags |= MSG_EOR;
- }
-
-
- err = dn_check_state(sk, addr, addr_len, &timeo, flags);
- if (err)
- goto out_err;
-
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- err = -EPIPE;
- if (!(flags & MSG_NOSIGNAL))
- send_sig(SIGPIPE, current, 0);
- goto out_err;
- }
-
- if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
- dst_negative_advice(sk);
-
- mss = scp->segsize_rem;
- fctype = scp->services_rem & NSP_FC_MASK;
-
- mss = dn_current_mss(sk, flags);
-
- if (flags & MSG_OOB) {
- queue = &scp->other_xmit_queue;
- if (size > mss) {
- err = -EMSGSIZE;
- goto out;
- }
- }
-
- scp->persist_fxn = dn_nsp_xmit_timeout;
-
- while(sent < size) {
- err = sock_error(sk);
- if (err)
- goto out;
-
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto out;
- }
-
- /*
- * Calculate size that we wish to send.
- */
- len = size - sent;
-
- if (len > mss)
- len = mss;
-
- /*
- * Wait for queue size to go down below the window
- * size.
- */
- if (dn_queue_too_long(scp, queue, flags)) {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- if (flags & MSG_DONTWAIT) {
- err = -EWOULDBLOCK;
- goto out;
- }
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo,
- !dn_queue_too_long(scp, queue, flags), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- continue;
- }
-
- /*
- * Get a suitably sized skb.
- * 64 is a bit of a hack really, but its larger than any
- * link-layer headers and has served us well as a good
- * guess as to their real length.
- */
- skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER,
- flags & MSG_DONTWAIT, &err);
-
- if (err)
- break;
-
- if (!skb)
- continue;
-
- cb = DN_SKB_CB(skb);
-
- skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER);
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
- goto out;
- }
-
- if (flags & MSG_OOB) {
- cb->nsp_flags = 0x30;
- if (fctype != NSP_FC_NONE)
- scp->flowrem_oth--;
- } else {
- cb->nsp_flags = 0x00;
- if (scp->seg_total == 0)
- cb->nsp_flags |= 0x20;
-
- scp->seg_total += len;
-
- if (((sent + len) == size) && (flags & MSG_EOR)) {
- cb->nsp_flags |= 0x40;
- scp->seg_total = 0;
- if (fctype == NSP_FC_SCMC)
- scp->flowrem_dat--;
- }
- if (fctype == NSP_FC_SRC)
- scp->flowrem_dat--;
- }
-
- sent += len;
- dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB);
- skb = NULL;
-
- scp->persist = dn_nsp_persist(sk);
-
- }
-out:
-
- kfree_skb(skb);
-
- release_sock(sk);
-
- return sent ? sent : err;
-
-out_err:
- err = sk_stream_error(sk, flags, err);
- release_sock(sk);
- return err;
-}
-
-static int dn_device_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_UP:
- dn_dev_up(dev);
- break;
- case NETDEV_DOWN:
- dn_dev_down(dev);
- break;
- default:
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block dn_dev_notifier = {
- .notifier_call = dn_device_event,
-};
-
-static struct packet_type dn_dix_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_DNA_RT),
- .func = dn_route_rcv,
-};
-
-#ifdef CONFIG_PROC_FS
-struct dn_iter_state {
- int bucket;
-};
-
-static struct sock *dn_socket_get_first(struct seq_file *seq)
-{
- struct dn_iter_state *state = seq->private;
- struct sock *n = NULL;
-
- for(state->bucket = 0;
- state->bucket < DN_SK_HASH_SIZE;
- ++state->bucket) {
- n = sk_head(&dn_sk_hash[state->bucket]);
- if (n)
- break;
- }
-
- return n;
-}
-
-static struct sock *dn_socket_get_next(struct seq_file *seq,
- struct sock *n)
-{
- struct dn_iter_state *state = seq->private;
-
- n = sk_next(n);
- while (!n) {
- if (++state->bucket >= DN_SK_HASH_SIZE)
- break;
- n = sk_head(&dn_sk_hash[state->bucket]);
- }
- return n;
-}
-
-static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos)
-{
- struct sock *sk = dn_socket_get_first(seq);
-
- if (sk) {
- while(*pos && (sk = dn_socket_get_next(seq, sk)))
- --*pos;
- }
- return *pos ? NULL : sk;
-}
-
-static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos)
-{
- void *rc;
- read_lock_bh(&dn_hash_lock);
- rc = socket_get_idx(seq, &pos);
- if (!rc) {
- read_unlock_bh(&dn_hash_lock);
- }
- return rc;
-}
-
-static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- void *rc;
-
- if (v == SEQ_START_TOKEN) {
- rc = dn_socket_get_idx(seq, 0);
- goto out;
- }
-
- rc = dn_socket_get_next(seq, v);
- if (rc)
- goto out;
- read_unlock_bh(&dn_hash_lock);
-out:
- ++*pos;
- return rc;
-}
-
-static void dn_socket_seq_stop(struct seq_file *seq, void *v)
-{
- if (v && v != SEQ_START_TOKEN)
- read_unlock_bh(&dn_hash_lock);
-}
-
-#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126)
-
-static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf)
-{
- int i;
-
- switch (le16_to_cpu(dn->sdn_objnamel)) {
- case 0:
- sprintf(buf, "%d", dn->sdn_objnum);
- break;
- default:
- for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) {
- buf[i] = dn->sdn_objname[i];
- if (IS_NOT_PRINTABLE(buf[i]))
- buf[i] = '.';
- }
- buf[i] = 0;
- }
-}
-
-static char *dn_state2asc(unsigned char state)
-{
- switch (state) {
- case DN_O:
- return "OPEN";
- case DN_CR:
- return " CR";
- case DN_DR:
- return " DR";
- case DN_DRC:
- return " DRC";
- case DN_CC:
- return " CC";
- case DN_CI:
- return " CI";
- case DN_NR:
- return " NR";
- case DN_NC:
- return " NC";
- case DN_CD:
- return " CD";
- case DN_RJ:
- return " RJ";
- case DN_RUN:
- return " RUN";
- case DN_DI:
- return " DI";
- case DN_DIC:
- return " DIC";
- case DN_DN:
- return " DN";
- case DN_CL:
- return " CL";
- case DN_CN:
- return " CN";
- }
-
- return "????";
-}
-
-static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- char buf1[DN_ASCBUF_LEN];
- char buf2[DN_ASCBUF_LEN];
- char local_object[DN_MAXOBJL+3];
- char remote_object[DN_MAXOBJL+3];
-
- dn_printable_object(&scp->addr, local_object);
- dn_printable_object(&scp->peer, remote_object);
-
- seq_printf(seq,
- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s "
- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n",
- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1),
- scp->addrloc,
- scp->numdat,
- scp->numoth,
- scp->ackxmt_dat,
- scp->ackxmt_oth,
- scp->flowloc_sw,
- local_object,
- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2),
- scp->addrrem,
- scp->numdat_rcv,
- scp->numoth_rcv,
- scp->ackrcv_dat,
- scp->ackrcv_oth,
- scp->flowrem_sw,
- remote_object,
- dn_state2asc(scp->state),
- ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER"));
-}
-
-static int dn_socket_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Local Remote\n");
- } else {
- dn_socket_format_entry(seq, v);
- }
- return 0;
-}
-
-static const struct seq_operations dn_socket_seq_ops = {
- .start = dn_socket_seq_start,
- .next = dn_socket_seq_next,
- .stop = dn_socket_seq_stop,
- .show = dn_socket_seq_show,
-};
-#endif
-
-static const struct net_proto_family dn_family_ops = {
- .family = AF_DECnet,
- .create = dn_create,
- .owner = THIS_MODULE,
-};
-
-static const struct proto_ops dn_proto_ops = {
- .family = AF_DECnet,
- .owner = THIS_MODULE,
- .release = dn_release,
- .bind = dn_bind,
- .connect = dn_connect,
- .socketpair = sock_no_socketpair,
- .accept = dn_accept,
- .getname = dn_getname,
- .poll = dn_poll,
- .ioctl = dn_ioctl,
- .listen = dn_listen,
- .shutdown = dn_shutdown,
- .setsockopt = dn_setsockopt,
- .getsockopt = dn_getsockopt,
- .sendmsg = dn_sendmsg,
- .recvmsg = dn_recvmsg,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
-MODULE_AUTHOR("Linux DECnet Project Team");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_DECnet);
-
-static const char banner[] __initconst = KERN_INFO
-"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
-
-static int __init decnet_init(void)
-{
- int rc;
-
- printk(banner);
-
- rc = proto_register(&dn_proto, 1);
- if (rc != 0)
- goto out;
-
- dn_neigh_init();
- dn_dev_init();
- dn_route_init();
- dn_fib_init();
-
- sock_register(&dn_family_ops);
- dev_add_pack(&dn_dix_packet_type);
- register_netdevice_notifier(&dn_dev_notifier);
-
- proc_create_seq_private("decnet", 0444, init_net.proc_net,
- &dn_socket_seq_ops, sizeof(struct dn_iter_state),
- NULL);
- dn_register_sysctl();
-out:
- return rc;
-
-}
-module_init(decnet_init);
-
-/*
- * Prevent DECnet module unloading until its fixed properly.
- * Requires an audit of the code to check for memory leaks and
- * initialisation problems etc.
- */
-#if 0
-static void __exit decnet_exit(void)
-{
- sock_unregister(AF_DECnet);
- rtnl_unregister_all(PF_DECnet);
- dev_remove_pack(&dn_dix_packet_type);
-
- dn_unregister_sysctl();
-
- unregister_netdevice_notifier(&dn_dev_notifier);
-
- dn_route_cleanup();
- dn_dev_cleanup();
- dn_neigh_cleanup();
- dn_fib_cleanup();
-
- remove_proc_entry("decnet", init_net.proc_net);
-
- proto_unregister(&dn_proto);
-
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
-}
-module_exit(decnet_exit);
-#endif
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
deleted file mode 100644
index 0ee7d4c0c955..000000000000
--- a/net/decnet/dn_dev.c
+++ /dev/null
@@ -1,1433 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Device Layer
- *
- * Authors: Steve Whitehouse <SteveW@ACM.org>
- * Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- * Steve Whitehouse : Devices now see incoming frames so they
- * can mark on who it came from.
- * Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour
- * can now have a device specific setup func.
- * Steve Whitehouse : Added /proc/sys/net/decnet/conf/<dev>/
- * Steve Whitehouse : Fixed bug which sometimes killed timer
- * Steve Whitehouse : Multiple ifaddr support
- * Steve Whitehouse : SIOCGIFCONF is now a compile time option
- * Steve Whitehouse : /proc/sys/net/decnet/conf/<sys>/forwarding
- * Steve Whitehouse : Removed timer1 - it's a user space issue now
- * Patrick Caulfield : Fixed router hello message format
- * Steve Whitehouse : Got rid of constant sizes for blksize for
- * devices. All mtu based now.
- */
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/if_addr.h>
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/skbuff.h>
-#include <linux/sysctl.h>
-#include <linux/notifier.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/uaccess.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/netlink.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-#include <net/dn_neigh.h>
-#include <net/dn_fib.h>
-
-#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
-
-static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
-static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};
-static char dn_hiord[ETH_ALEN] = {0xAA,0x00,0x04,0x00,0x00,0x00};
-static unsigned char dn_eco_version[3] = {0x02,0x00,0x00};
-
-extern struct neigh_table dn_neigh_table;
-
-/*
- * decnet_address is kept in network order.
- */
-__le16 decnet_address = 0;
-
-static DEFINE_SPINLOCK(dndev_lock);
-static struct net_device *decnet_default_device;
-static BLOCKING_NOTIFIER_HEAD(dnaddr_chain);
-
-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
-static void dn_dev_delete(struct net_device *dev);
-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa);
-
-static int dn_eth_up(struct net_device *);
-static void dn_eth_down(struct net_device *);
-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa);
-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa);
-
-static struct dn_dev_parms dn_dev_list[] = {
-{
- .type = ARPHRD_ETHER, /* Ethernet */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ethernet",
- .up = dn_eth_up,
- .down = dn_eth_down,
- .timer3 = dn_send_brd_hello,
-},
-{
- .type = ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ipgre",
- .timer3 = dn_send_brd_hello,
-},
-#if 0
-{
- .type = ARPHRD_X25, /* Bog standard X.25 */
- .mode = DN_DEV_UCAST,
- .state = DN_DEV_S_DS,
- .t2 = 1,
- .t3 = 120,
- .name = "x25",
- .timer3 = dn_send_ptp_hello,
-},
-#endif
-#if 0
-{
- .type = ARPHRD_PPP, /* DECnet over PPP */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ppp",
- .timer3 = dn_send_brd_hello,
-},
-#endif
-{
- .type = ARPHRD_DDCMP, /* DECnet over DDCMP */
- .mode = DN_DEV_UCAST,
- .state = DN_DEV_S_DS,
- .t2 = 1,
- .t3 = 120,
- .name = "ddcmp",
- .timer3 = dn_send_ptp_hello,
-},
-{
- .type = ARPHRD_LOOPBACK, /* Loopback interface - always last */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "loopback",
- .timer3 = dn_send_brd_hello,
-}
-};
-
-#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list)
-
-#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x)
-
-#ifdef CONFIG_SYSCTL
-
-static int min_t2[] = { 1 };
-static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */
-static int min_t3[] = { 1 };
-static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */
-
-static int min_priority[1];
-static int max_priority[] = { 127 }; /* From DECnet spec */
-
-static int dn_forwarding_proc(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-static struct dn_dev_sysctl_table {
- struct ctl_table_header *sysctl_header;
- struct ctl_table dn_dev_vars[5];
-} dn_dev_sysctl = {
- NULL,
- {
- {
- .procname = "forwarding",
- .data = (void *)DN_DEV_PARMS_OFFSET(forwarding),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = dn_forwarding_proc,
- },
- {
- .procname = "priority",
- .data = (void *)DN_DEV_PARMS_OFFSET(priority),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_priority,
- .extra2 = &max_priority
- },
- {
- .procname = "t2",
- .data = (void *)DN_DEV_PARMS_OFFSET(t2),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_t2,
- .extra2 = &max_t2
- },
- {
- .procname = "t3",
- .data = (void *)DN_DEV_PARMS_OFFSET(t3),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_t3,
- .extra2 = &max_t3
- },
- { }
- },
-};
-
-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
-{
- struct dn_dev_sysctl_table *t;
- int i;
-
- char path[sizeof("net/decnet/conf/") + IFNAMSIZ];
-
- t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
- if (t == NULL)
- return;
-
- for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
- long offset = (long)t->dn_dev_vars[i].data;
- t->dn_dev_vars[i].data = ((char *)parms) + offset;
- }
-
- snprintf(path, sizeof(path), "net/decnet/conf/%s",
- dev? dev->name : parms->name);
-
- t->dn_dev_vars[0].extra1 = (void *)dev;
-
- t->sysctl_header = register_net_sysctl(&init_net, path, t->dn_dev_vars);
- if (t->sysctl_header == NULL)
- kfree(t);
- else
- parms->sysctl = t;
-}
-
-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
-{
- if (parms->sysctl) {
- struct dn_dev_sysctl_table *t = parms->sysctl;
- parms->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
- }
-}
-
-static int dn_forwarding_proc(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
-#ifdef CONFIG_DECNET_ROUTER
- struct net_device *dev = table->extra1;
- struct dn_dev *dn_db;
- int err;
- int tmp, old;
-
- if (table->extra1 == NULL)
- return -EINVAL;
-
- dn_db = rcu_dereference_raw(dev->dn_ptr);
- old = dn_db->parms.forwarding;
-
- err = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if ((err >= 0) && write) {
- if (dn_db->parms.forwarding < 0)
- dn_db->parms.forwarding = 0;
- if (dn_db->parms.forwarding > 2)
- dn_db->parms.forwarding = 2;
- /*
- * What an ugly hack this is... its works, just. It
- * would be nice if sysctl/proc were just that little
- * bit more flexible so I don't have to write a special
- * routine, or suffer hacks like this - SJW
- */
- tmp = dn_db->parms.forwarding;
- dn_db->parms.forwarding = old;
- if (dn_db->parms.down)
- dn_db->parms.down(dev);
- dn_db->parms.forwarding = tmp;
- if (dn_db->parms.up)
- dn_db->parms.up(dev);
- }
-
- return err;
-#else
- return -EINVAL;
-#endif
-}
-
-#else /* CONFIG_SYSCTL */
-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
-{
-}
-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
-{
-}
-
-#endif /* CONFIG_SYSCTL */
-
-static inline __u16 mtu2blksize(struct net_device *dev)
-{
- u32 blksize = dev->mtu;
- if (blksize > 0xffff)
- blksize = 0xffff;
-
- if (dev->type == ARPHRD_ETHER ||
- dev->type == ARPHRD_PPP ||
- dev->type == ARPHRD_IPGRE ||
- dev->type == ARPHRD_LOOPBACK)
- blksize -= 2;
-
- return (__u16)blksize;
-}
-
-static struct dn_ifaddr *dn_dev_alloc_ifa(void)
-{
- struct dn_ifaddr *ifa;
-
- ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
-
- return ifa;
-}
-
-static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
-{
- kfree_rcu(ifa, rcu);
-}
-
-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
-{
- struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
- unsigned char mac_addr[6];
- struct net_device *dev = dn_db->dev;
-
- ASSERT_RTNL();
-
- *ifap = ifa1->ifa_next;
-
- if (dn_db->dev->type == ARPHRD_ETHER) {
- if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
- dn_dn2eth(mac_addr, ifa1->ifa_local);
- dev_mc_del(dev, mac_addr);
- }
- }
-
- dn_ifaddr_notify(RTM_DELADDR, ifa1);
- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
- if (destroy) {
- dn_dev_free_ifa(ifa1);
-
- if (dn_db->ifa_list == NULL)
- dn_dev_delete(dn_db->dev);
- }
-}
-
-static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
-{
- struct net_device *dev = dn_db->dev;
- struct dn_ifaddr *ifa1;
- unsigned char mac_addr[6];
-
- ASSERT_RTNL();
-
- /* Check for duplicates */
- for (ifa1 = rtnl_dereference(dn_db->ifa_list);
- ifa1 != NULL;
- ifa1 = rtnl_dereference(ifa1->ifa_next)) {
- if (ifa1->ifa_local == ifa->ifa_local)
- return -EEXIST;
- }
-
- if (dev->type == ARPHRD_ETHER) {
- if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
- dn_dn2eth(mac_addr, ifa->ifa_local);
- dev_mc_add(dev, mac_addr);
- }
- }
-
- ifa->ifa_next = dn_db->ifa_list;
- rcu_assign_pointer(dn_db->ifa_list, ifa);
-
- dn_ifaddr_notify(RTM_NEWADDR, ifa);
- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
-
- return 0;
-}
-
-static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
- int rv;
-
- if (dn_db == NULL) {
- int err;
- dn_db = dn_dev_create(dev, &err);
- if (dn_db == NULL)
- return err;
- }
-
- ifa->ifa_dev = dn_db;
-
- if (dev->flags & IFF_LOOPBACK)
- ifa->ifa_scope = RT_SCOPE_HOST;
-
- rv = dn_dev_insert_ifa(dn_db, ifa);
- if (rv)
- dn_dev_free_ifa(ifa);
- return rv;
-}
-
-
-int dn_dev_ioctl(unsigned int cmd, void __user *arg)
-{
- char buffer[DN_IFREQ_SIZE];
- struct ifreq *ifr = (struct ifreq *)buffer;
- struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
- struct dn_dev *dn_db;
- struct net_device *dev;
- struct dn_ifaddr *ifa = NULL;
- struct dn_ifaddr __rcu **ifap = NULL;
- int ret = 0;
-
- if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
- return -EFAULT;
- ifr->ifr_name[IFNAMSIZ-1] = 0;
-
- dev_load(&init_net, ifr->ifr_name);
-
- switch (cmd) {
- case SIOCGIFADDR:
- break;
- case SIOCSIFADDR:
- if (!capable(CAP_NET_ADMIN))
- return -EACCES;
- if (sdn->sdn_family != AF_DECnet)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- rtnl_lock();
-
- if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) {
- ret = -ENODEV;
- goto done;
- }
-
- if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
- for (ifap = &dn_db->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
- ifap = &ifa->ifa_next)
- if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
- break;
- }
-
- if (ifa == NULL && cmd != SIOCSIFADDR) {
- ret = -EADDRNOTAVAIL;
- goto done;
- }
-
- switch (cmd) {
- case SIOCGIFADDR:
- *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local;
- if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
- ret = -EFAULT;
- break;
-
- case SIOCSIFADDR:
- if (!ifa) {
- if ((ifa = dn_dev_alloc_ifa()) == NULL) {
- ret = -ENOBUFS;
- break;
- }
- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
- } else {
- if (ifa->ifa_local == dn_saddr2dn(sdn))
- break;
- dn_dev_del_ifa(dn_db, ifap, 0);
- }
-
- ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn);
-
- ret = dn_dev_set_ifa(dev, ifa);
- }
-done:
- rtnl_unlock();
-
- return ret;
-}
-
-struct net_device *dn_dev_get_default(void)
-{
- struct net_device *dev;
-
- spin_lock(&dndev_lock);
- dev = decnet_default_device;
- if (dev) {
- if (dev->dn_ptr)
- dev_hold(dev);
- else
- dev = NULL;
- }
- spin_unlock(&dndev_lock);
-
- return dev;
-}
-
-int dn_dev_set_default(struct net_device *dev, int force)
-{
- struct net_device *old = NULL;
- int rv = -EBUSY;
- if (!dev->dn_ptr)
- return -ENODEV;
-
- spin_lock(&dndev_lock);
- if (force || decnet_default_device == NULL) {
- old = decnet_default_device;
- decnet_default_device = dev;
- rv = 0;
- }
- spin_unlock(&dndev_lock);
-
- dev_put(old);
- return rv;
-}
-
-static void dn_dev_check_default(struct net_device *dev)
-{
- spin_lock(&dndev_lock);
- if (dev == decnet_default_device) {
- decnet_default_device = NULL;
- } else {
- dev = NULL;
- }
- spin_unlock(&dndev_lock);
-
- dev_put(dev);
-}
-
-/*
- * Called with RTNL
- */
-static struct dn_dev *dn_dev_by_index(int ifindex)
-{
- struct net_device *dev;
- struct dn_dev *dn_dev = NULL;
-
- dev = __dev_get_by_index(&init_net, ifindex);
- if (dev)
- dn_dev = rtnl_dereference(dev->dn_ptr);
-
- return dn_dev;
-}
-
-static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
- [IFA_ADDRESS] = { .type = NLA_U16 },
- [IFA_LOCAL] = { .type = NLA_U16 },
- [IFA_LABEL] = { .type = NLA_STRING,
- .len = IFNAMSIZ - 1 },
- [IFA_FLAGS] = { .type = NLA_U32 },
-};
-
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr *tb[IFA_MAX+1];
- struct dn_dev *dn_db;
- struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa;
- struct dn_ifaddr __rcu **ifap;
- int err = -EINVAL;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- goto errout;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- dn_ifa_policy, extack);
- if (err < 0)
- goto errout;
-
- err = -ENODEV;
- ifm = nlmsg_data(nlh);
- if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
- goto errout;
-
- err = -EADDRNOTAVAIL;
- for (ifap = &dn_db->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
- ifap = &ifa->ifa_next) {
- if (tb[IFA_LOCAL] &&
- nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
- continue;
-
- if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
- continue;
-
- dn_dev_del_ifa(dn_db, ifap, 1);
- return 0;
- }
-
-errout:
- return err;
-}
-
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr *tb[IFA_MAX+1];
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa;
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- dn_ifa_policy, extack);
- if (err < 0)
- return err;
-
- if (tb[IFA_LOCAL] == NULL)
- return -EINVAL;
-
- ifm = nlmsg_data(nlh);
- if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
- return -ENODEV;
-
- if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
- dn_db = dn_dev_create(dev, &err);
- if (!dn_db)
- return err;
- }
-
- if ((ifa = dn_dev_alloc_ifa()) == NULL)
- return -ENOBUFS;
-
- if (tb[IFA_ADDRESS] == NULL)
- tb[IFA_ADDRESS] = tb[IFA_LOCAL];
-
- ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]);
- ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]);
- ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
- ifm->ifa_flags;
- ifa->ifa_scope = ifm->ifa_scope;
- ifa->ifa_dev = dn_db;
-
- if (tb[IFA_LABEL])
- nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
- else
- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
-
- err = dn_dev_insert_ifa(dn_db, ifa);
- if (err)
- dn_dev_free_ifa(ifa);
-
- return err;
-}
-
-static inline size_t dn_ifaddr_nlmsg_size(void)
-{
- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
- + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
- + nla_total_size(2) /* IFA_ADDRESS */
- + nla_total_size(2) /* IFA_LOCAL */
- + nla_total_size(4); /* IFA_FLAGS */
-}
-
-static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
- u32 portid, u32 seq, int event, unsigned int flags)
-{
- struct ifaddrmsg *ifm;
- struct nlmsghdr *nlh;
- u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
- if (nlh == NULL)
- return -EMSGSIZE;
-
- ifm = nlmsg_data(nlh);
- ifm->ifa_family = AF_DECnet;
- ifm->ifa_prefixlen = 16;
- ifm->ifa_flags = ifa_flags;
- ifm->ifa_scope = ifa->ifa_scope;
- ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
-
- if ((ifa->ifa_address &&
- nla_put_le16(skb, IFA_ADDRESS, ifa->ifa_address)) ||
- (ifa->ifa_local &&
- nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) ||
- (ifa->ifa_label[0] &&
- nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
- nla_put_u32(skb, IFA_FLAGS, ifa_flags))
- goto nla_put_failure;
- nlmsg_end(skb, nlh);
- return 0;
-
-nla_put_failure:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
-{
- struct sk_buff *skb;
- int err = -ENOBUFS;
-
- skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL);
- if (skb == NULL)
- goto errout;
-
- err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
- return;
-errout:
- if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
-}
-
-static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- int idx, dn_idx = 0, skip_ndevs, skip_naddr;
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- skip_ndevs = cb->args[0];
- skip_naddr = cb->args[1];
-
- idx = 0;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (idx < skip_ndevs)
- goto cont;
- else if (idx > skip_ndevs) {
- /* Only skip over addresses for first dev dumped
- * in this iteration (idx == skip_ndevs) */
- skip_naddr = 0;
- }
-
- if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
- goto cont;
-
- for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
- ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
- if (dn_idx < skip_naddr)
- continue;
-
- if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWADDR,
- NLM_F_MULTI) < 0)
- goto done;
- }
-cont:
- idx++;
- }
-done:
- rcu_read_unlock();
- cb->args[0] = idx;
- cb->args[1] = dn_idx;
-
- return skb->len;
-}
-
-static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
-{
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int rv = -ENODEV;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL)
- goto out;
-
- ifa = rcu_dereference(dn_db->ifa_list);
- if (ifa != NULL) {
- *addr = ifa->ifa_local;
- rv = 0;
- }
-out:
- rcu_read_unlock();
- return rv;
-}
-
-/*
- * Find a default address to bind to.
- *
- * This is one of those areas where the initial VMS concepts don't really
- * map onto the Linux concepts, and since we introduced multiple addresses
- * per interface we have to cope with slightly odd ways of finding out what
- * "our address" really is. Mostly it's not a problem; for this we just guess
- * a sensible default. Eventually the routing code will take care of all the
- * nasties for us I hope.
- */
-int dn_dev_bind_default(__le16 *addr)
-{
- struct net_device *dev;
- int rv;
- dev = dn_dev_get_default();
-last_chance:
- if (dev) {
- rv = dn_dev_get_first(dev, addr);
- dev_put(dev);
- if (rv == 0 || dev == init_net.loopback_dev)
- return rv;
- }
- dev = init_net.loopback_dev;
- dev_hold(dev);
- goto last_chance;
-}
-
-static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct endnode_hello_message *msg;
- struct sk_buff *skb = NULL;
- __le16 *pktlen;
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
- return;
-
- skb->dev = dev;
-
- msg = skb_put(skb, sizeof(*msg));
-
- msg->msgflg = 0x0D;
- memcpy(msg->tiver, dn_eco_version, 3);
- dn_dn2eth(msg->id, ifa->ifa_local);
- msg->iinfo = DN_RT_INFO_ENDN;
- msg->blksize = cpu_to_le16(mtu2blksize(dev));
- msg->area = 0x00;
- memset(msg->seed, 0, 8);
- memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
-
- if (dn_db->router) {
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
- dn_dn2eth(msg->neighbor, dn->addr);
- }
-
- msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3);
- msg->mpd = 0x00;
- msg->datalen = 0x02;
- memset(msg->data, 0xAA, 2);
-
- pktlen = skb_push(skb, 2);
- *pktlen = cpu_to_le16(skb->len - 2);
-
- skb_reset_network_header(skb);
-
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
-}
-
-
-#define DRDELAY (5 * HZ)
-
-static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa)
-{
- /* First check time since device went up */
- if (time_before(jiffies, dn_db->uptime + DRDELAY))
- return 0;
-
- /* If there is no router, then yes... */
- if (!dn_db->router)
- return 1;
-
- /* otherwise only if we have a higher priority or.. */
- if (dn->priority < dn_db->parms.priority)
- return 1;
-
- /* if we have equal priority and a higher node number */
- if (dn->priority != dn_db->parms.priority)
- return 0;
-
- if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local))
- return 1;
-
- return 0;
-}
-
-static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- int n;
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
- struct sk_buff *skb;
- size_t size;
- unsigned char *ptr;
- unsigned char *i1, *i2;
- __le16 *pktlen;
- char *src;
-
- if (mtu2blksize(dev) < (26 + 7))
- return;
-
- n = mtu2blksize(dev) - 26;
- n /= 7;
-
- if (n > 32)
- n = 32;
-
- size = 2 + 26 + 7 * n;
-
- if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL)
- return;
-
- skb->dev = dev;
- ptr = skb_put(skb, size);
-
- *ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH;
- *ptr++ = 2; /* ECO */
- *ptr++ = 0;
- *ptr++ = 0;
- dn_dn2eth(ptr, ifa->ifa_local);
- src = ptr;
- ptr += ETH_ALEN;
- *ptr++ = dn_db->parms.forwarding == 1 ?
- DN_RT_INFO_L1RT : DN_RT_INFO_L2RT;
- *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev));
- ptr += 2;
- *ptr++ = dn_db->parms.priority; /* Priority */
- *ptr++ = 0; /* Area: Reserved */
- *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3);
- ptr += 2;
- *ptr++ = 0; /* MPD: Reserved */
- i1 = ptr++;
- memset(ptr, 0, 7); /* Name: Reserved */
- ptr += 7;
- i2 = ptr++;
-
- n = dn_neigh_elist(dev, ptr, n);
-
- *i2 = 7 * n;
- *i1 = 8 + *i2;
-
- skb_trim(skb, (27 + *i2));
-
- pktlen = skb_push(skb, 2);
- *pktlen = cpu_to_le16(skb->len - 2);
-
- skb_reset_network_header(skb);
-
- if (dn_am_i_a_router(dn, dn_db, ifa)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
- if (skb2) {
- dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src);
- }
- }
-
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
-}
-
-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dn_send_endnode_hello(dev, ifa);
- else
- dn_send_router_hello(dev, ifa);
-}
-
-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- int tdlen = 16;
- int size = dev->hard_header_len + 2 + 4 + tdlen;
- struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC);
- int i;
- unsigned char *ptr;
- char src[ETH_ALEN];
-
- if (skb == NULL)
- return ;
-
- skb->dev = dev;
- skb_push(skb, dev->hard_header_len);
- ptr = skb_put(skb, 2 + 4 + tdlen);
-
- *ptr++ = DN_RT_PKT_HELO;
- *((__le16 *)ptr) = ifa->ifa_local;
- ptr += 2;
- *ptr++ = tdlen;
-
- for(i = 0; i < tdlen; i++)
- *ptr++ = 0252;
-
- dn_dn2eth(src, ifa->ifa_local);
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
-}
-
-static int dn_eth_up(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dev_mc_add(dev, dn_rt_all_end_mcast);
- else
- dev_mc_add(dev, dn_rt_all_rt_mcast);
-
- dn_db->use_long = 1;
-
- return 0;
-}
-
-static void dn_eth_down(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dev_mc_del(dev, dn_rt_all_end_mcast);
- else
- dev_mc_del(dev, dn_rt_all_rt_mcast);
-}
-
-static void dn_dev_set_timer(struct net_device *dev);
-
-static void dn_dev_timer_func(struct timer_list *t)
-{
- struct dn_dev *dn_db = from_timer(dn_db, t, timer);
- struct net_device *dev;
- struct dn_ifaddr *ifa;
-
- rcu_read_lock();
- dev = dn_db->dev;
- if (dn_db->t3 <= dn_db->parms.t2) {
- if (dn_db->parms.timer3) {
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa;
- ifa = rcu_dereference(ifa->ifa_next)) {
- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
- dn_db->parms.timer3(dev, ifa);
- }
- }
- dn_db->t3 = dn_db->parms.t3;
- } else {
- dn_db->t3 -= dn_db->parms.t2;
- }
- rcu_read_unlock();
- dn_dev_set_timer(dev);
-}
-
-static void dn_dev_set_timer(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.t2 > dn_db->parms.t3)
- dn_db->parms.t2 = dn_db->parms.t3;
-
- dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
-
- add_timer(&dn_db->timer);
-}
-
-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
-{
- int i;
- struct dn_dev_parms *p = dn_dev_list;
- struct dn_dev *dn_db;
-
- for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) {
- if (p->type == dev->type)
- break;
- }
-
- *err = -ENODEV;
- if (i == DN_DEV_LIST_SIZE)
- return NULL;
-
- *err = -ENOBUFS;
- if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
- return NULL;
-
- memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
-
- rcu_assign_pointer(dev->dn_ptr, dn_db);
- dn_db->dev = dev;
- timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
-
- dn_db->uptime = jiffies;
-
- dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
- if (!dn_db->neigh_parms) {
- RCU_INIT_POINTER(dev->dn_ptr, NULL);
- kfree(dn_db);
- return NULL;
- }
-
- if (dn_db->parms.up) {
- if (dn_db->parms.up(dev) < 0) {
- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
- dev->dn_ptr = NULL;
- kfree(dn_db);
- return NULL;
- }
- }
-
- dn_dev_sysctl_register(dev, &dn_db->parms);
-
- dn_dev_set_timer(dev);
-
- *err = 0;
- return dn_db;
-}
-
-
-/*
- * This processes a device up event. We only start up
- * the loopback device & ethernet devices with correct
- * MAC addresses automatically. Others must be started
- * specifically.
- *
- * FIXME: How should we configure the loopback address ? If we could dispense
- * with using decnet_address here and for autobind, it will be one less thing
- * for users to worry about setting up.
- */
-
-void dn_dev_up(struct net_device *dev)
-{
- struct dn_ifaddr *ifa;
- __le16 addr = decnet_address;
- int maybe_default = 0;
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
-
- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
- return;
-
- /*
- * Need to ensure that loopback device has a dn_db attached to it
- * to allow creation of neighbours against it, even though it might
- * not have a local address of its own. Might as well do the same for
- * all autoconfigured interfaces.
- */
- if (dn_db == NULL) {
- int err;
- dn_db = dn_dev_create(dev, &err);
- if (dn_db == NULL)
- return;
- }
-
- if (dev->type == ARPHRD_ETHER) {
- if (memcmp(dev->dev_addr, dn_hiord, 4) != 0)
- return;
- addr = dn_eth2dn(dev->dev_addr);
- maybe_default = 1;
- }
-
- if (addr == 0)
- return;
-
- if ((ifa = dn_dev_alloc_ifa()) == NULL)
- return;
-
- ifa->ifa_local = ifa->ifa_address = addr;
- ifa->ifa_flags = 0;
- ifa->ifa_scope = RT_SCOPE_UNIVERSE;
- strcpy(ifa->ifa_label, dev->name);
-
- dn_dev_set_ifa(dev, ifa);
-
- /*
- * Automagically set the default device to the first automatically
- * configured ethernet card in the system.
- */
- if (maybe_default) {
- dev_hold(dev);
- if (dn_dev_set_default(dev, 0))
- dev_put(dev);
- }
-}
-
-static void dn_dev_delete(struct net_device *dev)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
-
- if (dn_db == NULL)
- return;
-
- del_timer_sync(&dn_db->timer);
- dn_dev_sysctl_unregister(&dn_db->parms);
- dn_dev_check_default(dev);
- neigh_ifdown(&dn_neigh_table, dev);
-
- if (dn_db->parms.down)
- dn_db->parms.down(dev);
-
- dev->dn_ptr = NULL;
-
- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
- neigh_ifdown(&dn_neigh_table, dev);
-
- if (dn_db->router)
- neigh_release(dn_db->router);
- if (dn_db->peer)
- neigh_release(dn_db->peer);
-
- kfree(dn_db);
-}
-
-void dn_dev_down(struct net_device *dev)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
- struct dn_ifaddr *ifa;
-
- if (dn_db == NULL)
- return;
-
- while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
- dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
- dn_dev_free_ifa(ifa);
- }
-
- dn_dev_delete(dev);
-}
-
-void dn_dev_init_pkt(struct sk_buff *skb)
-{
-}
-
-void dn_dev_veri_pkt(struct sk_buff *skb)
-{
-}
-
-void dn_dev_hello(struct sk_buff *skb)
-{
-}
-
-void dn_dev_devices_off(void)
-{
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev)
- dn_dev_down(dev);
- rtnl_unlock();
-
-}
-
-void dn_dev_devices_on(void)
-{
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- if (dev->flags & IFF_UP)
- dn_dev_up(dev);
- }
- rtnl_unlock();
-}
-
-int register_dnaddr_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&dnaddr_chain, nb);
-}
-
-int unregister_dnaddr_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&dnaddr_chain, nb);
-}
-
-#ifdef CONFIG_PROC_FS
-static inline int is_dn_dev(struct net_device *dev)
-{
- return dev->dn_ptr != NULL;
-}
-
-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
-{
- int i;
- struct net_device *dev;
-
- rcu_read_lock();
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- i = 1;
- for_each_netdev_rcu(&init_net, dev) {
- if (!is_dn_dev(dev))
- continue;
-
- if (i++ == *pos)
- return dev;
- }
-
- return NULL;
-}
-
-static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct net_device *dev;
-
- ++*pos;
-
- dev = v;
- if (v == SEQ_START_TOKEN)
- dev = net_device_entry(&init_net.dev_base_head);
-
- for_each_netdev_continue_rcu(&init_net, dev) {
- if (!is_dn_dev(dev))
- continue;
-
- return dev;
- }
-
- return NULL;
-}
-
-static void dn_dev_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
-{
- rcu_read_unlock();
-}
-
-static char *dn_type2asc(char type)
-{
- switch (type) {
- case DN_DEV_BCAST:
- return "B";
- case DN_DEV_UCAST:
- return "U";
- case DN_DEV_MPOINT:
- return "M";
- }
-
- return "?";
-}
-
-static int dn_dev_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Name Flags T1 Timer1 T3 Timer3 BlkSize Pri State DevType Router Peer\n");
- else {
- struct net_device *dev = v;
- char peer_buf[DN_ASCBUF_LEN];
- char router_buf[DN_ASCBUF_LEN];
- struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
-
- seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
- " %04hu %03d %02x %-10s %-7s %-7s\n",
- dev->name,
- dn_type2asc(dn_db->parms.mode),
- 0, 0,
- dn_db->t3, dn_db->parms.t3,
- mtu2blksize(dev),
- dn_db->parms.priority,
- dn_db->parms.state, dn_db->parms.name,
- dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "",
- dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : "");
- }
- return 0;
-}
-
-static const struct seq_operations dn_dev_seq_ops = {
- .start = dn_dev_seq_start,
- .next = dn_dev_seq_next,
- .stop = dn_dev_seq_stop,
- .show = dn_dev_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-static int addr[2];
-module_param_array(addr, int, NULL, 0444);
-MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
-
-void __init dn_dev_init(void)
-{
- if (addr[0] > 63 || addr[0] < 0) {
- printk(KERN_ERR "DECnet: Area must be between 0 and 63");
- return;
- }
-
- if (addr[1] > 1023 || addr[1] < 0) {
- printk(KERN_ERR "DECnet: Node must be between 0 and 1023");
- return;
- }
-
- decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]);
-
- dn_dev_devices_on();
-
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
- dn_nl_newaddr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
- dn_nl_deladdr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
- NULL, dn_nl_dump_ifaddr, 0);
-
- proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops);
-
-#ifdef CONFIG_SYSCTL
- {
- int i;
- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
- dn_dev_sysctl_register(NULL, &dn_dev_list[i]);
- }
-#endif /* CONFIG_SYSCTL */
-}
-
-void __exit dn_dev_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
- {
- int i;
- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
- dn_dev_sysctl_unregister(&dn_dev_list[i]);
- }
-#endif /* CONFIG_SYSCTL */
-
- remove_proc_entry("decnet_dev", init_net.proc_net);
-
- dn_dev_devices_off();
-}
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
deleted file mode 100644
index 269c029ad74f..000000000000
--- a/net/decnet/dn_fib.c
+++ /dev/null
@@ -1,798 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Glue/Info List)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Alexey Kuznetsov : SMP locking changes
- * Steve Whitehouse : Rewrote it... Well to be more correct, I
- * copied most of it from the ipv4 fib code.
- * Steve Whitehouse : Updated it in style and fixed a few bugs
- * which were fixed in the ipv4 code since
- * this code was copied from it.
- *
- */
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/sockios.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-#include <net/rtnh.h>
-
-#define RT_MIN_TABLE 1
-
-#define for_fib_info() { struct dn_fib_info *fi;\
- for(fi = dn_fib_info_list; fi; fi = fi->fib_next)
-#define endfor_fib_info() }
-
-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define endfor_nexthops(fi) }
-
-static DEFINE_SPINLOCK(dn_fib_multipath_lock);
-static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_SPINLOCK(dn_fib_info_lock);
-
-static struct
-{
- int error;
- u8 scope;
-} dn_fib_props[RTN_MAX+1] = {
- [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
- [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE },
- [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST },
- [RTN_BROADCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_ANYCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_MULTICAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE },
- [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE },
- [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE },
- [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE },
- [RTN_NAT] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
- [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
-};
-
-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
-static int dn_fib_sync_up(struct net_device *dev);
-
-void dn_fib_free_info(struct dn_fib_info *fi)
-{
- if (fi->fib_dead == 0) {
- printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n");
- return;
- }
-
- change_nexthops(fi) {
- dev_put(nh->nh_dev);
- nh->nh_dev = NULL;
- } endfor_nexthops(fi);
- kfree(fi);
-}
-
-void dn_fib_release_info(struct dn_fib_info *fi)
-{
- spin_lock(&dn_fib_info_lock);
- if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
- if (fi->fib_next)
- fi->fib_next->fib_prev = fi->fib_prev;
- if (fi->fib_prev)
- fi->fib_prev->fib_next = fi->fib_next;
- if (fi == dn_fib_info_list)
- dn_fib_info_list = fi->fib_next;
- fi->fib_dead = 1;
- dn_fib_info_put(fi);
- }
- spin_unlock(&dn_fib_info_lock);
-}
-
-static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
-{
- const struct dn_fib_nh *onh = ofi->fib_nh;
-
- for_nexthops(fi) {
- if (nh->nh_oif != onh->nh_oif ||
- nh->nh_gw != onh->nh_gw ||
- nh->nh_scope != onh->nh_scope ||
- nh->nh_weight != onh->nh_weight ||
- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
- return -1;
- onh++;
- } endfor_nexthops(fi);
- return 0;
-}
-
-static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi)
-{
- for_fib_info() {
- if (fi->fib_nhs != nfi->fib_nhs)
- continue;
- if (nfi->fib_protocol == fi->fib_protocol &&
- nfi->fib_prefsrc == fi->fib_prefsrc &&
- nfi->fib_priority == fi->fib_priority &&
- memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
- (nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0))
- return fi;
- } endfor_fib_info();
- return NULL;
-}
-
-static int dn_fib_count_nhs(const struct nlattr *attr)
-{
- struct rtnexthop *nhp = nla_data(attr);
- int nhs = 0, nhlen = nla_len(attr);
-
- while (rtnh_ok(nhp, nhlen)) {
- nhs++;
- nhp = rtnh_next(nhp, &nhlen);
- }
-
- /* leftover implies invalid nexthop configuration, discard it */
- return nhlen > 0 ? 0 : nhs;
-}
-
-static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
- const struct rtmsg *r)
-{
- struct rtnexthop *nhp = nla_data(attr);
- int nhlen = nla_len(attr);
-
- change_nexthops(fi) {
- int attrlen;
-
- if (!rtnh_ok(nhp, nhlen))
- return -EINVAL;
-
- nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
- nh->nh_oif = nhp->rtnh_ifindex;
- nh->nh_weight = nhp->rtnh_hops + 1;
-
- attrlen = rtnh_attrlen(nhp);
- if (attrlen > 0) {
- struct nlattr *gw_attr;
-
- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
- nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
- }
-
- nhp = rtnh_next(nhp, &nhlen);
- } endfor_nexthops(fi);
-
- return 0;
-}
-
-
-static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh)
-{
- int err;
-
- if (nh->nh_gw) {
- struct flowidn fld;
- struct dn_fib_res res;
-
- if (nh->nh_flags&RTNH_F_ONLINK) {
- struct net_device *dev;
-
- if (r->rtm_scope >= RT_SCOPE_LINK)
- return -EINVAL;
- if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
- return -EINVAL;
- if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
- nh->nh_dev = dev;
- dev_hold(dev);
- nh->nh_scope = RT_SCOPE_LINK;
- return 0;
- }
-
- memset(&fld, 0, sizeof(fld));
- fld.daddr = nh->nh_gw;
- fld.flowidn_oif = nh->nh_oif;
- fld.flowidn_scope = r->rtm_scope + 1;
-
- if (fld.flowidn_scope < RT_SCOPE_LINK)
- fld.flowidn_scope = RT_SCOPE_LINK;
-
- if ((err = dn_fib_lookup(&fld, &res)) != 0)
- return err;
-
- err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
- goto out;
- nh->nh_scope = res.scope;
- nh->nh_oif = DN_FIB_RES_OIF(res);
- nh->nh_dev = DN_FIB_RES_DEV(res);
- if (nh->nh_dev == NULL)
- goto out;
- dev_hold(nh->nh_dev);
- err = -ENETDOWN;
- if (!(nh->nh_dev->flags & IFF_UP))
- goto out;
- err = 0;
-out:
- dn_fib_res_put(&res);
- return err;
- } else {
- struct net_device *dev;
-
- if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
- return -EINVAL;
-
- dev = __dev_get_by_index(&init_net, nh->nh_oif);
- if (dev == NULL || dev->dn_ptr == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
- nh->nh_dev = dev;
- dev_hold(nh->nh_dev);
- nh->nh_scope = RT_SCOPE_HOST;
- }
-
- return 0;
-}
-
-
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
- const struct nlmsghdr *nlh, int *errp)
-{
- int err;
- struct dn_fib_info *fi = NULL;
- struct dn_fib_info *ofi;
- int nhs = 1;
-
- if (r->rtm_type > RTN_MAX)
- goto err_inval;
-
- if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
- goto err_inval;
-
- if (attrs[RTA_MULTIPATH] &&
- (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
- goto err_inval;
-
- fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
- err = -ENOBUFS;
- if (fi == NULL)
- goto failure;
-
- fi->fib_protocol = r->rtm_protocol;
- fi->fib_nhs = nhs;
- fi->fib_flags = r->rtm_flags;
-
- if (attrs[RTA_PRIORITY])
- fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
-
- if (attrs[RTA_METRICS]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
- int type = nla_type(attr);
-
- if (type) {
- if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
- nla_len(attr) < 4)
- goto err_inval;
-
- fi->fib_metrics[type-1] = nla_get_u32(attr);
- }
- }
- }
-
- if (attrs[RTA_PREFSRC])
- fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
-
- if (attrs[RTA_MULTIPATH]) {
- if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
- goto failure;
-
- if (attrs[RTA_OIF] &&
- fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
- goto err_inval;
-
- if (attrs[RTA_GATEWAY] &&
- fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
- goto err_inval;
- } else {
- struct dn_fib_nh *nh = fi->fib_nh;
-
- if (attrs[RTA_OIF])
- nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
-
- if (attrs[RTA_GATEWAY])
- nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
-
- nh->nh_flags = r->rtm_flags;
- nh->nh_weight = 1;
- }
-
- if (r->rtm_type == RTN_NAT) {
- if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
- goto err_inval;
-
- fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
- goto link_it;
- }
-
- if (dn_fib_props[r->rtm_type].error) {
- if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
- goto err_inval;
-
- goto link_it;
- }
-
- if (r->rtm_scope > RT_SCOPE_HOST)
- goto err_inval;
-
- if (r->rtm_scope == RT_SCOPE_HOST) {
- struct dn_fib_nh *nh = fi->fib_nh;
-
- /* Local address is added */
- if (nhs != 1 || nh->nh_gw)
- goto err_inval;
- nh->nh_scope = RT_SCOPE_NOWHERE;
- nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
- err = -ENODEV;
- if (nh->nh_dev == NULL)
- goto failure;
- } else {
- change_nexthops(fi) {
- if ((err = dn_fib_check_nh(r, fi, nh)) != 0)
- goto failure;
- } endfor_nexthops(fi)
- }
-
- if (fi->fib_prefsrc) {
- if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
- fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
- if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
- goto err_inval;
- }
-
-link_it:
- if ((ofi = dn_fib_find_info(fi)) != NULL) {
- fi->fib_dead = 1;
- dn_fib_free_info(fi);
- refcount_inc(&ofi->fib_treeref);
- return ofi;
- }
-
- refcount_set(&fi->fib_treeref, 1);
- refcount_set(&fi->fib_clntref, 1);
- spin_lock(&dn_fib_info_lock);
- fi->fib_next = dn_fib_info_list;
- fi->fib_prev = NULL;
- if (dn_fib_info_list)
- dn_fib_info_list->fib_prev = fi;
- dn_fib_info_list = fi;
- spin_unlock(&dn_fib_info_lock);
- return fi;
-
-err_inval:
- err = -EINVAL;
-
-failure:
- *errp = err;
- if (fi) {
- fi->fib_dead = 1;
- dn_fib_free_info(fi);
- }
-
- return NULL;
-}
-
-int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res)
-{
- int err = dn_fib_props[type].error;
-
- if (err == 0) {
- if (fi->fib_flags & RTNH_F_DEAD)
- return 1;
-
- res->fi = fi;
-
- switch (type) {
- case RTN_NAT:
- DN_FIB_RES_RESET(*res);
- refcount_inc(&fi->fib_clntref);
- return 0;
- case RTN_UNICAST:
- case RTN_LOCAL:
- for_nexthops(fi) {
- if (nh->nh_flags & RTNH_F_DEAD)
- continue;
- if (!fld->flowidn_oif ||
- fld->flowidn_oif == nh->nh_oif)
- break;
- }
- if (nhsel < fi->fib_nhs) {
- res->nh_sel = nhsel;
- refcount_inc(&fi->fib_clntref);
- return 0;
- }
- endfor_nexthops(fi);
- res->fi = NULL;
- return 1;
- default:
- net_err_ratelimited("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n",
- type);
- res->fi = NULL;
- return -EINVAL;
- }
- }
- return err;
-}
-
-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
-{
- struct dn_fib_info *fi = res->fi;
- int w;
-
- spin_lock_bh(&dn_fib_multipath_lock);
- if (fi->fib_power <= 0) {
- int power = 0;
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- power += nh->nh_weight;
- nh->nh_power = nh->nh_weight;
- }
- } endfor_nexthops(fi);
- fi->fib_power = power;
- if (power < 0) {
- spin_unlock_bh(&dn_fib_multipath_lock);
- res->nh_sel = 0;
- return;
- }
- }
-
- w = jiffies % fi->fib_power;
-
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
- if ((w -= nh->nh_power) <= 0) {
- nh->nh_power--;
- fi->fib_power--;
- res->nh_sel = nhsel;
- spin_unlock_bh(&dn_fib_multipath_lock);
- return;
- }
- }
- } endfor_nexthops(fi);
- res->nh_sel = 0;
- spin_unlock_bh(&dn_fib_multipath_lock);
-}
-
-static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
-{
- if (attrs[RTA_TABLE])
- table = nla_get_u32(attrs[RTA_TABLE]);
-
- return table;
-}
-
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_fib_table *tb;
- struct rtmsg *r = nlmsg_data(nlh);
- struct nlattr *attrs[RTA_MAX+1];
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
- rtm_dn_policy, extack);
- if (err < 0)
- return err;
-
- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
- if (!tb)
- return -ESRCH;
-
- return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
-}
-
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_fib_table *tb;
- struct rtmsg *r = nlmsg_data(nlh);
- struct nlattr *attrs[RTA_MAX+1];
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX,
- rtm_dn_policy, extack);
- if (err < 0)
- return err;
-
- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
- if (!tb)
- return -ENOBUFS;
-
- return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
-}
-
-static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
-{
- struct dn_fib_table *tb;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
- struct {
- struct nlattr hdr;
- __le16 dst;
- } dst_attr = {
- .dst = dst,
- };
- struct {
- struct nlattr hdr;
- __le16 prefsrc;
- } prefsrc_attr = {
- .prefsrc = ifa->ifa_local,
- };
- struct {
- struct nlattr hdr;
- u32 oif;
- } oif_attr = {
- .oif = ifa->ifa_dev->dev->ifindex,
- };
- struct nlattr *attrs[RTA_MAX+1] = {
- [RTA_DST] = (struct nlattr *) &dst_attr,
- [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
- [RTA_OIF] = (struct nlattr *) &oif_attr,
- };
-
- memset(&req.rtm, 0, sizeof(req.rtm));
-
- if (type == RTN_UNICAST)
- tb = dn_fib_get_table(RT_MIN_TABLE, 1);
- else
- tb = dn_fib_get_table(RT_TABLE_LOCAL, 1);
-
- if (tb == NULL)
- return;
-
- req.nlh.nlmsg_len = sizeof(req);
- req.nlh.nlmsg_type = cmd;
- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
- req.nlh.nlmsg_pid = 0;
- req.nlh.nlmsg_seq = 0;
-
- req.rtm.rtm_dst_len = dst_len;
- req.rtm.rtm_table = tb->n;
- req.rtm.rtm_protocol = RTPROT_KERNEL;
- req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
- req.rtm.rtm_type = type;
-
- if (cmd == RTM_NEWROUTE)
- tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
- else
- tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
-}
-
-static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
-{
-
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
-
-#if 0
- if (!(dev->flags&IFF_UP))
- return;
- /* In the future, we will want to add default routes here */
-
-#endif
-}
-
-static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
-{
- int found_it = 0;
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa2;
-
- ASSERT_RTNL();
-
- /* Scan device list */
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL)
- continue;
- for (ifa2 = rcu_dereference(dn_db->ifa_list);
- ifa2 != NULL;
- ifa2 = rcu_dereference(ifa2->ifa_next)) {
- if (ifa2->ifa_local == ifa->ifa_local) {
- found_it = 1;
- break;
- }
- }
- }
- rcu_read_unlock();
-
- if (found_it == 0) {
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
-
- if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
- if (dn_fib_sync_down(ifa->ifa_local, NULL, 0))
- dn_fib_flush();
- }
- }
-}
-
-static void dn_fib_disable_addr(struct net_device *dev, int force)
-{
- if (dn_fib_sync_down(0, dev, force))
- dn_fib_flush();
- dn_rt_cache_flush(0);
- neigh_ifdown(&dn_neigh_table, dev);
-}
-
-static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr;
-
- switch (event) {
- case NETDEV_UP:
- dn_fib_add_ifaddr(ifa);
- dn_fib_sync_up(ifa->ifa_dev->dev);
- dn_rt_cache_flush(-1);
- break;
- case NETDEV_DOWN:
- dn_fib_del_ifaddr(ifa);
- if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
- dn_fib_disable_addr(ifa->ifa_dev->dev, 1);
- } else {
- dn_rt_cache_flush(-1);
- }
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
-{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
-
- if (force)
- scope = -1;
-
- for_fib_info() {
- /*
- * This makes no sense for DECnet.... we will almost
- * certainly have more than one local address the same
- * over all our interfaces. It needs thinking about
- * some more.
- */
- if (local && fi->fib_prefsrc == local) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- } else if (dev && fi->fib_nhs) {
- int dead = 0;
-
- change_nexthops(fi) {
- if (nh->nh_flags&RTNH_F_DEAD)
- dead++;
- else if (nh->nh_dev == dev &&
- nh->nh_scope != scope) {
- spin_lock_bh(&dn_fib_multipath_lock);
- nh->nh_flags |= RTNH_F_DEAD;
- fi->fib_power -= nh->nh_power;
- nh->nh_power = 0;
- spin_unlock_bh(&dn_fib_multipath_lock);
- dead++;
- }
- } endfor_nexthops(fi)
- if (dead == fi->fib_nhs) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- }
- }
- } endfor_fib_info();
- return ret;
-}
-
-
-static int dn_fib_sync_up(struct net_device *dev)
-{
- int ret = 0;
-
- if (!(dev->flags&IFF_UP))
- return 0;
-
- for_fib_info() {
- int alive = 0;
-
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- alive++;
- continue;
- }
- if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
- continue;
- if (nh->nh_dev != dev || dev->dn_ptr == NULL)
- continue;
- alive++;
- spin_lock_bh(&dn_fib_multipath_lock);
- nh->nh_power = 0;
- nh->nh_flags &= ~RTNH_F_DEAD;
- spin_unlock_bh(&dn_fib_multipath_lock);
- } endfor_nexthops(fi);
-
- if (alive > 0) {
- fi->fib_flags &= ~RTNH_F_DEAD;
- ret++;
- }
- } endfor_fib_info();
- return ret;
-}
-
-static struct notifier_block dn_fib_dnaddr_notifier = {
- .notifier_call = dn_fib_dnaddr_event,
-};
-
-void __exit dn_fib_cleanup(void)
-{
- dn_fib_table_cleanup();
- dn_fib_rules_cleanup();
-
- unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier);
-}
-
-
-void __init dn_fib_init(void)
-{
- dn_fib_table_init();
- dn_fib_rules_init();
-
- register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
-
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
- dn_fib_rtm_newroute, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
- dn_fib_rtm_delroute, NULL, 0);
-}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
deleted file mode 100644
index 94b306f6d551..000000000000
--- a/net/decnet/dn_neigh.c
+++ /dev/null
@@ -1,605 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Neighbour Functions (Adjacency Database and
- * On-Ethernet Cache)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse : Fixed router listing routine
- * Steve Whitehouse : Added error_report functions
- * Steve Whitehouse : Added default router detection
- * Steve Whitehouse : Hop counts in outgoing messages
- * Steve Whitehouse : Fixed src/dst in outgoing messages so
- * forwarding now stands a good chance of
- * working.
- * Steve Whitehouse : Fixed neighbour states (for now anyway).
- * Steve Whitehouse : Made error_report functions dummies. This
- * is not the right place to return skbs.
- * Steve Whitehouse : Convert to seq_file
- *
- */
-
-#include <linux/net.h>
-#include <linux/module.h>
-#include <linux/socket.h>
-#include <linux/if_arp.h>
-#include <linux/slab.h>
-#include <linux/if_ether.h>
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/string.h>
-#include <linux/netfilter_decnet.h>
-#include <linux/spinlock.h>
-#include <linux/seq_file.h>
-#include <linux/rcupdate.h>
-#include <linux/jhash.h>
-#include <linux/atomic.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_neigh.h>
-#include <net/dn_route.h>
-
-static int dn_neigh_construct(struct neighbour *);
-static void dn_neigh_error_report(struct neighbour *, struct sk_buff *);
-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb);
-
-/*
- * Operations for adding the link layer header.
- */
-static const struct neigh_ops dn_neigh_ops = {
- .family = AF_DECnet,
- .error_report = dn_neigh_error_report,
- .output = dn_neigh_output,
- .connected_output = dn_neigh_output,
-};
-
-static u32 dn_neigh_hash(const void *pkey,
- const struct net_device *dev,
- __u32 *hash_rnd)
-{
- return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
-}
-
-static bool dn_key_eq(const struct neighbour *neigh, const void *pkey)
-{
- return neigh_key_eq16(neigh, pkey);
-}
-
-struct neigh_table dn_neigh_table = {
- .family = PF_DECnet,
- .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
- .key_len = sizeof(__le16),
- .protocol = cpu_to_be16(ETH_P_DNA_RT),
- .hash = dn_neigh_hash,
- .key_eq = dn_key_eq,
- .constructor = dn_neigh_construct,
- .id = "dn_neigh_cache",
- .parms ={
- .tbl = &dn_neigh_table,
- .reachable_time = 30 * HZ,
- .data = {
- [NEIGH_VAR_MCAST_PROBES] = 0,
- [NEIGH_VAR_UCAST_PROBES] = 0,
- [NEIGH_VAR_APP_PROBES] = 0,
- [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
- [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
- [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
- [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
- [NEIGH_VAR_PROXY_QLEN] = 0,
- [NEIGH_VAR_ANYCAST_DELAY] = 0,
- [NEIGH_VAR_PROXY_DELAY] = 0,
- [NEIGH_VAR_LOCKTIME] = 1 * HZ,
- },
- },
- .gc_interval = 30 * HZ,
- .gc_thresh1 = 128,
- .gc_thresh2 = 512,
- .gc_thresh3 = 1024,
-};
-
-static int dn_neigh_construct(struct neighbour *neigh)
-{
- struct net_device *dev = neigh->dev;
- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
- struct dn_dev *dn_db;
- struct neigh_parms *parms;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- parms = dn_db->neigh_parms;
- if (!parms) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- __neigh_parms_put(neigh->parms);
- neigh->parms = neigh_parms_clone(parms);
- rcu_read_unlock();
-
- neigh->ops = &dn_neigh_ops;
- neigh->nud_state = NUD_NOARP;
- neigh->output = neigh->ops->connected_output;
-
- if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT))
- memcpy(neigh->ha, dev->broadcast, dev->addr_len);
- else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK))
- dn_dn2eth(neigh->ha, dn->addr);
- else {
- net_dbg_ratelimited("Trying to create neigh for hw %d\n",
- dev->type);
- return -EINVAL;
- }
-
- /*
- * Make an estimate of the remote block size by assuming that its
- * two less then the device mtu, which it true for ethernet (and
- * other things which support long format headers) since there is
- * an extra length field (of 16 bits) which isn't part of the
- * ethernet headers and which the DECnet specs won't admit is part
- * of the DECnet routing headers either.
- *
- * If we over estimate here its no big deal, the NSP negotiations
- * will prevent us from sending packets which are too large for the
- * remote node to handle. In any case this figure is normally updated
- * by a hello message in most cases.
- */
- dn->blksize = dev->mtu - 2;
-
- return 0;
-}
-
-static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb)
-{
- printk(KERN_DEBUG "dn_neigh_error_report: called\n");
- kfree_skb(skb);
-}
-
-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct net_device *dev = neigh->dev;
- char mac_addr[ETH_ALEN];
- unsigned int seq;
- int err;
-
- dn_dn2eth(mac_addr, rt->rt_local_src);
- do {
- seq = read_seqbegin(&neigh->ha_lock);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, mac_addr, skb->len);
- } while (read_seqretry(&neigh->ha_lock, seq));
-
- if (err >= 0)
- err = dev_queue_xmit(skb);
- else {
- kfree_skb(skb);
- err = -EINVAL;
- }
- return err;
-}
-
-static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct neighbour *neigh = rt->n;
-
- return neigh->output(neigh, skb);
-}
-
-/*
- * For talking to broadcast devices: Ethernet & PPP
- */
-static int dn_long_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
- unsigned char *data;
- struct dn_long_packet *lp;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_long_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_long_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_long_packet) + 3);
- lp = (struct dn_long_packet *)(data+3);
-
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- *(data + 2) = 1 | DN_RT_F_PF; /* Padding */
-
- lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS));
- lp->d_area = lp->d_subarea = 0;
- dn_dn2eth(lp->d_id, cb->dst);
- lp->s_area = lp->s_subarea = 0;
- dn_dn2eth(lp->s_id, cb->src);
- lp->nl2 = 0;
- lp->visit_ct = cb->hops & 0x3f;
- lp->s_class = 0;
- lp->pt = 0;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-/*
- * For talking to pointopoint and multidrop devices: DDCMP and X.25
- */
-static int dn_short_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
- struct dn_short_packet *sp;
- unsigned char *data;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_short_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_short_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- sp = (struct dn_short_packet *)(data+2);
-
- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
- sp->dstnode = cb->dst;
- sp->srcnode = cb->src;
- sp->forward = cb->hops & 0x3f;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-/*
- * For talking to DECnet phase III nodes
- * Phase 3 output is the same as short output, execpt that
- * it clears the area bits before transmission.
- */
-static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
- struct dn_short_packet *sp;
- unsigned char *data;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_phase3_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_phase3_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- sp = (struct dn_short_packet *)(data + 2);
-
- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
- sp->dstnode = cb->dst & cpu_to_le16(0x03ff);
- sp->srcnode = cb->src & cpu_to_le16(0x03ff);
- sp->forward = cb->hops & 0x3f;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *neigh = rt->n;
- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
- struct dn_dev *dn_db;
- bool use_long;
-
- rcu_read_lock();
- dn_db = rcu_dereference(neigh->dev->dn_ptr);
- if (dn_db == NULL) {
- rcu_read_unlock();
- return -EINVAL;
- }
- use_long = dn_db->use_long;
- rcu_read_unlock();
-
- if (dn->flags & DN_NDFLAG_P3)
- return dn_phase3_output(neigh, sk, skb);
- if (use_long)
- return dn_long_output(neigh, sk, skb);
- else
- return dn_short_output(neigh, sk, skb);
-}
-
-/*
- * Unfortunately, the neighbour code uses the device in its hash
- * function, so we don't get any advantage from it. This function
- * basically does a neigh_lookup(), but without comparing the device
- * field. This is required for the On-Ethernet cache
- */
-
-/*
- * Pointopoint link receives a hello message
- */
-void dn_neigh_pointopoint_hello(struct sk_buff *skb)
-{
- kfree_skb(skb);
-}
-
-/*
- * Ethernet router hello message received
- */
-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
-
- struct neighbour *neigh;
- struct dn_neigh *dn;
- struct dn_dev *dn_db;
- __le16 src;
-
- src = dn_eth2dn(msg->id);
-
- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
-
- dn = container_of(neigh, struct dn_neigh, n);
-
- if (neigh) {
- write_lock(&neigh->lock);
-
- neigh->used = jiffies;
- dn_db = rcu_dereference(neigh->dev->dn_ptr);
-
- if (!(neigh->nud_state & NUD_PERMANENT)) {
- neigh->updated = jiffies;
-
- if (neigh->dev->type == ARPHRD_ETHER)
- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
-
- dn->blksize = le16_to_cpu(msg->blksize);
- dn->priority = msg->priority;
-
- dn->flags &= ~DN_NDFLAG_P3;
-
- switch (msg->iinfo & DN_RT_INFO_TYPE) {
- case DN_RT_INFO_L1RT:
- dn->flags &=~DN_NDFLAG_R2;
- dn->flags |= DN_NDFLAG_R1;
- break;
- case DN_RT_INFO_L2RT:
- dn->flags |= DN_NDFLAG_R2;
- }
- }
-
- /* Only use routers in our area */
- if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) {
- if (!dn_db->router) {
- dn_db->router = neigh_clone(neigh);
- } else {
- if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
- neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
- }
- }
- write_unlock(&neigh->lock);
- neigh_release(neigh);
- }
-
- kfree_skb(skb);
- return 0;
-}
-
-/*
- * Endnode hello message received
- */
-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
- struct neighbour *neigh;
- struct dn_neigh *dn;
- __le16 src;
-
- src = dn_eth2dn(msg->id);
-
- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
-
- dn = container_of(neigh, struct dn_neigh, n);
-
- if (neigh) {
- write_lock(&neigh->lock);
-
- neigh->used = jiffies;
-
- if (!(neigh->nud_state & NUD_PERMANENT)) {
- neigh->updated = jiffies;
-
- if (neigh->dev->type == ARPHRD_ETHER)
- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
- dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2);
- dn->blksize = le16_to_cpu(msg->blksize);
- dn->priority = 0;
- }
-
- write_unlock(&neigh->lock);
- neigh_release(neigh);
- }
-
- kfree_skb(skb);
- return 0;
-}
-
-static char *dn_find_slot(char *base, int max, int priority)
-{
- int i;
- unsigned char *min = NULL;
-
- base += 6; /* skip first id */
-
- for(i = 0; i < max; i++) {
- if (!min || (*base < *min))
- min = base;
- base += 7; /* find next priority */
- }
-
- if (!min)
- return NULL;
-
- return (*min < priority) ? (min - 6) : NULL;
-}
-
-struct elist_cb_state {
- struct net_device *dev;
- unsigned char *ptr;
- unsigned char *rs;
- int t, n;
-};
-
-static void neigh_elist_cb(struct neighbour *neigh, void *_info)
-{
- struct elist_cb_state *s = _info;
- struct dn_neigh *dn;
-
- if (neigh->dev != s->dev)
- return;
-
- dn = container_of(neigh, struct dn_neigh, n);
- if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
- return;
-
- if (s->t == s->n)
- s->rs = dn_find_slot(s->ptr, s->n, dn->priority);
- else
- s->t++;
- if (s->rs == NULL)
- return;
-
- dn_dn2eth(s->rs, dn->addr);
- s->rs += 6;
- *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0;
- *(s->rs) |= dn->priority;
- s->rs++;
-}
-
-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
-{
- struct elist_cb_state state;
-
- state.dev = dev;
- state.t = 0;
- state.n = n;
- state.ptr = ptr;
- state.rs = ptr;
-
- neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state);
-
- return state.t;
-}
-
-
-#ifdef CONFIG_PROC_FS
-
-static inline void dn_neigh_format_entry(struct seq_file *seq,
- struct neighbour *n)
-{
- struct dn_neigh *dn = container_of(n, struct dn_neigh, n);
- char buf[DN_ASCBUF_LEN];
-
- read_lock(&n->lock);
- seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n",
- dn_addr2asc(le16_to_cpu(dn->addr), buf),
- (dn->flags&DN_NDFLAG_R1) ? "1" : "-",
- (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
- (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
- dn->n.nud_state,
- refcount_read(&dn->n.refcnt),
- dn->blksize,
- (dn->n.dev) ? dn->n.dev->name : "?");
- read_unlock(&n->lock);
-}
-
-static int dn_neigh_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Addr Flags State Use Blksize Dev\n");
- } else {
- dn_neigh_format_entry(seq, v);
- }
-
- return 0;
-}
-
-static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return neigh_seq_start(seq, pos, &dn_neigh_table,
- NEIGH_SEQ_NEIGH_ONLY);
-}
-
-static const struct seq_operations dn_neigh_seq_ops = {
- .start = dn_neigh_seq_start,
- .next = neigh_seq_next,
- .stop = neigh_seq_stop,
- .show = dn_neigh_seq_show,
-};
-#endif
-
-void __init dn_neigh_init(void)
-{
- neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create_net("decnet_neigh", 0444, init_net.proc_net,
- &dn_neigh_seq_ops, sizeof(struct neigh_seq_state));
-}
-
-void __exit dn_neigh_cleanup(void)
-{
- remove_proc_entry("decnet_neigh", init_net.proc_net);
- neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table);
-}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
deleted file mode 100644
index c59be5b04479..000000000000
--- a/net/decnet/dn_nsp_in.c
+++ /dev/null
@@ -1,907 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Network Services Protocol (Input)
- *
- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- *
- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
- * original dn_nsp.c.
- * Steve Whitehouse: Updated to work with my new routing architecture.
- * Steve Whitehouse: Add changes from Eduardo Serrat's patches.
- * Steve Whitehouse: Put all ack handling code in a common routine.
- * Steve Whitehouse: Put other common bits into dn_nsp_rx()
- * Steve Whitehouse: More checks on skb->len to catch bogus packets
- * Fixed various race conditions and possible nasties.
- * Steve Whitehouse: Now handles returned conninit frames.
- * David S. Miller: New socket locking
- * Steve Whitehouse: Fixed lockup when socket filtering was enabled.
- * Paul Koning: Fix to push CC sockets into RUN when acks are
- * received.
- * Steve Whitehouse:
- * Patrick Caulfield: Checking conninits for correctness & sending of error
- * responses.
- * Steve Whitehouse: Added backlog congestion level return codes.
- * Patrick Caulfield:
- * Steve Whitehouse: Added flow control support (outbound)
- * Steve Whitehouse: Prepare for nonlinear skbs
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/filter.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/termios.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/netfilter_decnet.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-extern int decnet_log_martians;
-
-static void dn_log_martian(struct sk_buff *skb, const char *msg)
-{
- if (decnet_log_martians) {
- char *devname = skb->dev ? skb->dev->name : "???";
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- net_info_ratelimited("DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n",
- msg, devname,
- le16_to_cpu(cb->src),
- le16_to_cpu(cb->dst),
- le16_to_cpu(cb->src_port),
- le16_to_cpu(cb->dst_port));
- }
-}
-
-/*
- * For this function we've flipped the cross-subchannel bit
- * if the message is an otherdata or linkservice message. Thus
- * we can use it to work out what to update.
- */
-static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short type = ((ack >> 12) & 0x0003);
- int wakeup = 0;
-
- switch (type) {
- case 0: /* ACK - Data */
- if (dn_after(ack, scp->ackrcv_dat)) {
- scp->ackrcv_dat = ack & 0x0fff;
- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
- &scp->data_xmit_queue,
- ack);
- }
- break;
- case 1: /* NAK - Data */
- break;
- case 2: /* ACK - OtherData */
- if (dn_after(ack, scp->ackrcv_oth)) {
- scp->ackrcv_oth = ack & 0x0fff;
- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
- &scp->other_xmit_queue,
- ack);
- }
- break;
- case 3: /* NAK - OtherData */
- break;
- }
-
- if (wakeup && !sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-}
-
-/*
- * This function is a universal ack processor.
- */
-static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
-{
- __le16 *ptr = (__le16 *)skb->data;
- int len = 0;
- unsigned short ack;
-
- if (skb->len < 2)
- return len;
-
- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
- skb_pull(skb, 2);
- ptr++;
- len += 2;
- if ((ack & 0x4000) == 0) {
- if (oth)
- ack ^= 0x2000;
- dn_ack(sk, skb, ack);
- }
- }
-
- if (skb->len < 2)
- return len;
-
- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
- skb_pull(skb, 2);
- len += 2;
- if ((ack & 0x4000) == 0) {
- if (oth)
- ack ^= 0x2000;
- dn_ack(sk, skb, ack);
- }
- }
-
- return len;
-}
-
-
-/**
- * dn_check_idf - Check an image data field format is correct.
- * @pptr: Pointer to pointer to image data
- * @len: Pointer to length of image data
- * @max: The maximum allowed length of the data in the image data field
- * @follow_on: Check that this many bytes exist beyond the end of the image data
- *
- * Returns: 0 if ok, -1 on error
- */
-static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on)
-{
- unsigned char *ptr = *pptr;
- unsigned char flen = *ptr++;
-
- (*len)--;
- if (flen > max)
- return -1;
- if ((flen + follow_on) > *len)
- return -1;
-
- *len -= flen;
- *pptr = ptr + flen;
- return 0;
-}
-
-/*
- * Table of reason codes to pass back to node which sent us a badly
- * formed message, plus text messages for the log. A zero entry in
- * the reason field means "don't reply" otherwise a disc init is sent with
- * the specified reason code.
- */
-static struct {
- unsigned short reason;
- const char *text;
-} ci_err_table[] = {
- { 0, "CI: Truncated message" },
- { NSP_REASON_ID, "CI: Destination username error" },
- { NSP_REASON_ID, "CI: Destination username type" },
- { NSP_REASON_US, "CI: Source username error" },
- { 0, "CI: Truncated at menuver" },
- { 0, "CI: Truncated before access or user data" },
- { NSP_REASON_IO, "CI: Access data format error" },
- { NSP_REASON_IO, "CI: User data format error" }
-};
-
-/*
- * This function uses a slightly different lookup method
- * to find its sockets, since it searches on object name/number
- * rather than port numbers. Various tests are done to ensure that
- * the incoming data is in the correct format before it is queued to
- * a socket.
- */
-static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data;
- struct sockaddr_dn dstaddr;
- struct sockaddr_dn srcaddr;
- unsigned char type = 0;
- int dstlen;
- int srclen;
- unsigned char *ptr;
- int len;
- int err = 0;
- unsigned char menuver;
-
- memset(&dstaddr, 0, sizeof(struct sockaddr_dn));
- memset(&srcaddr, 0, sizeof(struct sockaddr_dn));
-
- /*
- * 1. Decode & remove message header
- */
- cb->src_port = msg->srcaddr;
- cb->dst_port = msg->dstaddr;
- cb->services = msg->services;
- cb->info = msg->info;
- cb->segsize = le16_to_cpu(msg->segsize);
-
- if (!pskb_may_pull(skb, sizeof(*msg)))
- goto err_out;
-
- skb_pull(skb, sizeof(*msg));
-
- len = skb->len;
- ptr = skb->data;
-
- /*
- * 2. Check destination end username format
- */
- dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type);
- err++;
- if (dstlen < 0)
- goto err_out;
-
- err++;
- if (type > 1)
- goto err_out;
-
- len -= dstlen;
- ptr += dstlen;
-
- /*
- * 3. Check source end username format
- */
- srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type);
- err++;
- if (srclen < 0)
- goto err_out;
-
- len -= srclen;
- ptr += srclen;
- err++;
- if (len < 1)
- goto err_out;
-
- menuver = *ptr;
- ptr++;
- len--;
-
- /*
- * 4. Check that optional data actually exists if menuver says it does
- */
- err++;
- if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1))
- goto err_out;
-
- /*
- * 5. Check optional access data format
- */
- err++;
- if (menuver & DN_MENUVER_ACC) {
- if (dn_check_idf(&ptr, &len, 39, 1))
- goto err_out;
- if (dn_check_idf(&ptr, &len, 39, 1))
- goto err_out;
- if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0))
- goto err_out;
- }
-
- /*
- * 6. Check optional user data format
- */
- err++;
- if (menuver & DN_MENUVER_USR) {
- if (dn_check_idf(&ptr, &len, 16, 0))
- goto err_out;
- }
-
- /*
- * 7. Look up socket based on destination end username
- */
- return dn_sklist_find_listener(&dstaddr);
-err_out:
- dn_log_martian(skb, ci_err_table[err].text);
- *reason = ci_err_table[err].reason;
- return NULL;
-}
-
-
-static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
-{
- if (sk_acceptq_is_full(sk)) {
- kfree_skb(skb);
- return;
- }
-
- sk_acceptq_added(sk);
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_state_change(sk);
-}
-
-static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
- unsigned char *ptr;
-
- if (skb->len < 4)
- goto out;
-
- ptr = skb->data;
- cb->services = *ptr++;
- cb->info = *ptr++;
- cb->segsize = le16_to_cpu(*(__le16 *)ptr);
-
- if ((scp->state == DN_CI) || (scp->state == DN_CD)) {
- scp->persist = 0;
- scp->addrrem = cb->src_port;
- sk->sk_state = TCP_ESTABLISHED;
- scp->state = DN_RUN;
- scp->services_rem = cb->services;
- scp->info_rem = cb->info;
- scp->segsize_rem = cb->segsize;
-
- if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
- scp->max_window = decnet_no_fc_max_cwnd;
-
- if (skb->len > 0) {
- u16 dlen = *skb->data;
- if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->conndata_in.opt_optl = cpu_to_le16(dlen);
- skb_copy_from_linear_data_offset(skb, 1,
- scp->conndata_in.opt_data, dlen);
- }
- }
- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
-out:
- kfree_skb(skb);
-}
-
-static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI) {
- scp->state = DN_CD;
- scp->persist = 0;
- }
-
- kfree_skb(skb);
-}
-
-static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned short reason;
-
- if (skb->len < 2)
- goto out;
-
- reason = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- scp->discdata_in.opt_status = cpu_to_le16(reason);
- scp->discdata_in.opt_optl = 0;
- memset(scp->discdata_in.opt_data, 0, 16);
-
- if (skb->len > 0) {
- u16 dlen = *skb->data;
- if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->discdata_in.opt_optl = cpu_to_le16(dlen);
- skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
- }
- }
-
- scp->addrrem = cb->src_port;
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_CI:
- case DN_CD:
- scp->state = DN_RJ;
- sk->sk_err = ECONNREFUSED;
- break;
- case DN_RUN:
- sk->sk_shutdown |= SHUTDOWN_MASK;
- scp->state = DN_DN;
- break;
- case DN_DI:
- scp->state = DN_DIC;
- break;
- }
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- sk->sk_state_change(sk);
- }
-
- /*
- * It appears that its possible for remote machines to send disc
- * init messages with no port identifier if we are in the CI and
- * possibly also the CD state. Obviously we shouldn't reply with
- * a message if we don't know what the end point is.
- */
- if (scp->addrrem) {
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC);
- }
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
-
-out:
- kfree_skb(skb);
-}
-
-/*
- * disc_conf messages are also called no_resources or no_link
- * messages depending upon the "reason" field.
- */
-static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short reason;
-
- if (skb->len != 2)
- goto out;
-
- reason = le16_to_cpu(*(__le16 *)skb->data);
-
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_CI:
- scp->state = DN_NR;
- break;
- case DN_DR:
- if (reason == NSP_REASON_DC)
- scp->state = DN_DRC;
- if (reason == NSP_REASON_NL)
- scp->state = DN_CN;
- break;
- case DN_DI:
- scp->state = DN_DIC;
- break;
- case DN_RUN:
- sk->sk_shutdown |= SHUTDOWN_MASK;
- fallthrough;
- case DN_CC:
- scp->state = DN_CN;
- }
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- sk->sk_state_change(sk);
- }
-
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
-
-out:
- kfree_skb(skb);
-}
-
-static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short segnum;
- unsigned char lsflags;
- signed char fcval;
- int wake_up = 0;
- char *ptr = skb->data;
- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
-
- if (skb->len != 4)
- goto out;
-
- segnum = le16_to_cpu(*(__le16 *)ptr);
- ptr += 2;
- lsflags = *(unsigned char *)ptr++;
- fcval = *ptr;
-
- /*
- * Here we ignore erroneous packets which should really
- * should cause a connection abort. It is not critical
- * for now though.
- */
- if (lsflags & 0xf8)
- goto out;
-
- if (seq_next(scp->numoth_rcv, segnum)) {
- seq_add(&scp->numoth_rcv, 1);
- switch(lsflags & 0x04) { /* FCVAL INT */
- case 0x00: /* Normal Request */
- switch(lsflags & 0x03) { /* FCVAL MOD */
- case 0x00: /* Request count */
- if (fcval < 0) {
- unsigned char p_fcval = -fcval;
- if ((scp->flowrem_dat > p_fcval) &&
- (fctype == NSP_FC_SCMC)) {
- scp->flowrem_dat -= p_fcval;
- }
- } else if (fcval > 0) {
- scp->flowrem_dat += fcval;
- wake_up = 1;
- }
- break;
- case 0x01: /* Stop outgoing data */
- scp->flowrem_sw = DN_DONTSEND;
- break;
- case 0x02: /* Ok to start again */
- scp->flowrem_sw = DN_SEND;
- dn_nsp_output(sk);
- wake_up = 1;
- }
- break;
- case 0x04: /* Interrupt Request */
- if (fcval > 0) {
- scp->flowrem_oth += fcval;
- wake_up = 1;
- }
- break;
- }
- if (wake_up && !sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
- dn_nsp_send_oth_ack(sk);
-
-out:
- kfree_skb(skb);
-}
-
-/*
- * Copy of sock_queue_rcv_skb (from sock.h) without
- * bh_lock_sock() (its already held when this is called) which
- * also allows data and other data to be queued to a socket.
- */
-static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue)
-{
- int err;
-
- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
- number of warnings when compiling with -W --ANK
- */
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf) {
- err = -ENOMEM;
- goto out;
- }
-
- err = sk_filter(sk, skb);
- if (err)
- goto out;
-
- skb_set_owner_r(skb, sk);
- skb_queue_tail(queue, skb);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
-out:
- return err;
-}
-
-static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short segnum;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int queued = 0;
-
- if (skb->len < 2)
- goto out;
-
- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- if (seq_next(scp->numoth_rcv, segnum)) {
-
- if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) {
- seq_add(&scp->numoth_rcv, 1);
- scp->other_report = 0;
- queued = 1;
- }
- }
-
- dn_nsp_send_oth_ack(sk);
-out:
- if (!queued)
- kfree_skb(skb);
-}
-
-static void dn_nsp_data(struct sock *sk, struct sk_buff *skb)
-{
- int queued = 0;
- unsigned short segnum;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
-
- if (skb->len < 2)
- goto out;
-
- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- if (seq_next(scp->numdat_rcv, segnum)) {
- if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) {
- seq_add(&scp->numdat_rcv, 1);
- queued = 1;
- }
-
- if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) {
- scp->flowloc_sw = DN_DONTSEND;
- dn_nsp_send_link(sk, DN_DONTSEND, 0);
- }
- }
-
- dn_nsp_send_data_ack(sk);
-out:
- if (!queued)
- kfree_skb(skb);
-}
-
-/*
- * If one of our conninit messages is returned, this function
- * deals with it. It puts the socket into the NO_COMMUNICATION
- * state.
- */
-static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI) {
- scp->state = DN_NC;
- sk->sk_state = TCP_CLOSE;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
- kfree_skb(skb);
-}
-
-static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int ret = NET_RX_DROP;
-
- /* Must not reply to returned packets */
- if (cb->rt_flags & DN_RT_F_RTS)
- goto out;
-
- if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) {
- switch (cb->nsp_flags & 0x70) {
- case 0x10:
- case 0x60: /* (Retransmitted) Connect Init */
- dn_nsp_return_disc(skb, NSP_DISCINIT, reason);
- ret = NET_RX_SUCCESS;
- break;
- case 0x20: /* Connect Confirm */
- dn_nsp_return_disc(skb, NSP_DISCCONF, reason);
- ret = NET_RX_SUCCESS;
- break;
- }
- }
-
-out:
- kfree_skb(skb);
- return ret;
-}
-
-static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
- struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sock *sk = NULL;
- unsigned char *ptr = (unsigned char *)skb->data;
- unsigned short reason = NSP_REASON_NL;
-
- if (!pskb_may_pull(skb, 2))
- goto free_out;
-
- skb_reset_transport_header(skb);
- cb->nsp_flags = *ptr++;
-
- if (decnet_debug_level & 2)
- printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags);
-
- if (cb->nsp_flags & 0x83)
- goto free_out;
-
- /*
- * Filter out conninits and useless packet types
- */
- if ((cb->nsp_flags & 0x0c) == 0x08) {
- switch (cb->nsp_flags & 0x70) {
- case 0x00: /* NOP */
- case 0x70: /* Reserved */
- case 0x50: /* Reserved, Phase II node init */
- goto free_out;
- case 0x10:
- case 0x60:
- if (unlikely(cb->rt_flags & DN_RT_F_RTS))
- goto free_out;
- sk = dn_find_listener(skb, &reason);
- goto got_it;
- }
- }
-
- if (!pskb_may_pull(skb, 3))
- goto free_out;
-
- /*
- * Grab the destination address.
- */
- cb->dst_port = *(__le16 *)ptr;
- cb->src_port = 0;
- ptr += 2;
-
- /*
- * If not a connack, grab the source address too.
- */
- if (pskb_may_pull(skb, 5)) {
- cb->src_port = *(__le16 *)ptr;
- ptr += 2;
- skb_pull(skb, 5);
- }
-
- /*
- * Returned packets...
- * Swap src & dst and look up in the normal way.
- */
- if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
- swap(cb->dst_port, cb->src_port);
- swap(cb->dst, cb->src);
- }
-
- /*
- * Find the socket to which this skb is destined.
- */
- sk = dn_find_by_skb(skb);
-got_it:
- if (sk != NULL) {
- struct dn_scp *scp = DN_SK(sk);
-
- /* Reset backoff */
- scp->nsp_rxtshift = 0;
-
- /*
- * We linearize everything except data segments here.
- */
- if (cb->nsp_flags & ~0x60) {
- if (unlikely(skb_linearize(skb)))
- goto free_out;
- }
-
- return sk_receive_skb(sk, skb, 0);
- }
-
- return dn_nsp_no_socket(skb, reason);
-
-free_out:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-int dn_nsp_rx(struct sk_buff *skb)
-{
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_nsp_rx_packet);
-}
-
-/*
- * This is the main receive routine for sockets. It is called
- * from the above when the socket is not busy, and also from
- * sock_release() when there is a backlog queued up.
- */
-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- if (cb->rt_flags & DN_RT_F_RTS) {
- if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68)
- dn_returned_conn_init(sk, skb);
- else
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
-
- /*
- * Control packet.
- */
- if ((cb->nsp_flags & 0x0c) == 0x08) {
- switch (cb->nsp_flags & 0x70) {
- case 0x10:
- case 0x60:
- dn_nsp_conn_init(sk, skb);
- break;
- case 0x20:
- dn_nsp_conn_conf(sk, skb);
- break;
- case 0x30:
- dn_nsp_disc_init(sk, skb);
- break;
- case 0x40:
- dn_nsp_disc_conf(sk, skb);
- break;
- }
-
- } else if (cb->nsp_flags == 0x24) {
- /*
- * Special for connacks, 'cos they don't have
- * ack data or ack otherdata info.
- */
- dn_nsp_conn_ack(sk, skb);
- } else {
- int other = 1;
-
- /* both data and ack frames can kick a CC socket into RUN */
- if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) {
- scp->state = DN_RUN;
- sk->sk_state = TCP_ESTABLISHED;
- sk->sk_state_change(sk);
- }
-
- if ((cb->nsp_flags & 0x1c) == 0)
- other = 0;
- if (cb->nsp_flags == 0x04)
- other = 0;
-
- /*
- * Read out ack data here, this applies equally
- * to data, other data, link service and both
- * ack data and ack otherdata.
- */
- dn_process_ack(sk, skb, other);
-
- /*
- * If we've some sort of data here then call a
- * suitable routine for dealing with it, otherwise
- * the packet is an ack and can be discarded.
- */
- if ((cb->nsp_flags & 0x0c) == 0) {
-
- if (scp->state != DN_RUN)
- goto free_out;
-
- switch (cb->nsp_flags) {
- case 0x10: /* LS */
- dn_nsp_linkservice(sk, skb);
- break;
- case 0x30: /* OD */
- dn_nsp_otherdata(sk, skb);
- break;
- default:
- dn_nsp_data(sk, skb);
- }
-
- } else { /* Ack, chuck it out here */
-free_out:
- kfree_skb(skb);
- }
- }
-
- return NET_RX_SUCCESS;
-}
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
deleted file mode 100644
index eadc89583168..000000000000
--- a/net/decnet/dn_nsp_out.c
+++ /dev/null
@@ -1,695 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Network Services Protocol (Output)
- *
- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- *
- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
- * original dn_nsp.c.
- * Steve Whitehouse: Updated to work with my new routing architecture.
- * Steve Whitehouse: Added changes from Eduardo Serrat's patches.
- * Steve Whitehouse: Now conninits have the "return" bit set.
- * Steve Whitehouse: Fixes to check alloc'd skbs are non NULL!
- * Moved output state machine into one function
- * Steve Whitehouse: New output state machine
- * Paul Koning: Connect Confirm message fix.
- * Eduardo Serrat: Fix to stop dn_nsp_do_disc() sending malformed packets.
- * Steve Whitehouse: dn_nsp_output() and friends needed a spring clean
- * Steve Whitehouse: Moved dn_nsp_send() in here from route.h
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/termios.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/if_packet.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-
-static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
-
-static void dn_nsp_send(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct dst_entry *dst;
- struct flowidn fld;
-
- skb_reset_transport_header(skb);
- scp->stamp = jiffies;
-
- dst = sk_dst_check(sk, 0);
- if (dst) {
-try_again:
- skb_dst_set(skb, dst);
- dst_output(&init_net, skb->sk, skb);
- return;
- }
-
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_oif = sk->sk_bound_dev_if;
- fld.saddr = dn_saddr2dn(&scp->addr);
- fld.daddr = dn_saddr2dn(&scp->peer);
- dn_sk_ports_copy(&fld, scp);
- fld.flowidn_proto = DNPROTO_NSP;
- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) {
- dst = sk_dst_get(sk);
- sk->sk_route_caps = dst->dev->features;
- goto try_again;
- }
-
- sk->sk_err = EHOSTUNREACH;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-}
-
-
-/*
- * If sk == NULL, then we assume that we are supposed to be making
- * a routing layer skb. If sk != NULL, then we are supposed to be
- * creating an skb for the NSP layer.
- *
- * The eventual aim is for each socket to have a cached header size
- * for its outgoing packets, and to set hdr from this when sk != NULL.
- */
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
-{
- struct sk_buff *skb;
- int hdr = 64;
-
- if ((skb = alloc_skb(size + hdr, pri)) == NULL)
- return NULL;
-
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->pkt_type = PACKET_OUTGOING;
-
- if (sk)
- skb_set_owner_w(skb, sk);
-
- skb_reserve(skb, hdr);
-
- return skb;
-}
-
-/*
- * Calculate persist timer based upon the smoothed round
- * trip time and the variance. Backoff according to the
- * nsp_backoff[] array.
- */
-unsigned long dn_nsp_persist(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
-
- t *= nsp_backoff[scp->nsp_rxtshift];
-
- if (t < HZ) t = HZ;
- if (t > (600*HZ)) t = (600*HZ);
-
- if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT)
- scp->nsp_rxtshift++;
-
- /* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */
-
- return t;
-}
-
-/*
- * This is called each time we get an estimate for the rtt
- * on the link.
- */
-static void dn_nsp_rtt(struct sock *sk, long rtt)
-{
- struct dn_scp *scp = DN_SK(sk);
- long srtt = (long)scp->nsp_srtt;
- long rttvar = (long)scp->nsp_rttvar;
- long delta;
-
- /*
- * If the jiffies clock flips over in the middle of timestamp
- * gathering this value might turn out negative, so we make sure
- * that is it always positive here.
- */
- if (rtt < 0)
- rtt = -rtt;
- /*
- * Add new rtt to smoothed average
- */
- delta = ((rtt << 3) - srtt);
- srtt += (delta >> 3);
- if (srtt >= 1)
- scp->nsp_srtt = (unsigned long)srtt;
- else
- scp->nsp_srtt = 1;
-
- /*
- * Add new rtt variance to smoothed varience
- */
- delta >>= 1;
- rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
- if (rttvar >= 1)
- scp->nsp_rttvar = (unsigned long)rttvar;
- else
- scp->nsp_rttvar = 1;
-
- /* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */
-}
-
-/**
- * dn_nsp_clone_and_send - Send a data packet by cloning it
- * @skb: The packet to clone and transmit
- * @gfp: memory allocation flag
- *
- * Clone a queued data or other data packet and transmit it.
- *
- * Returns: The number of times the packet has been sent previously
- */
-static inline unsigned int dn_nsp_clone_and_send(struct sk_buff *skb,
- gfp_t gfp)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sk_buff *skb2;
- int ret = 0;
-
- if ((skb2 = skb_clone(skb, gfp)) != NULL) {
- ret = cb->xmit_count;
- cb->xmit_count++;
- cb->stamp = jiffies;
- skb2->sk = skb->sk;
- dn_nsp_send(skb2);
- }
-
- return ret;
-}
-
-/**
- * dn_nsp_output - Try and send something from socket queues
- * @sk: The socket whose queues are to be investigated
- *
- * Try and send the packet on the end of the data and other data queues.
- * Other data gets priority over data, and if we retransmit a packet we
- * reduce the window by dividing it in two.
- *
- */
-void dn_nsp_output(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb;
- unsigned int reduce_win = 0;
-
- /*
- * First we check for otherdata/linkservice messages
- */
- if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL)
- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
-
- /*
- * If we may not send any data, we don't.
- * If we are still trying to get some other data down the
- * channel, we don't try and send any data.
- */
- if (reduce_win || (scp->flowrem_sw != DN_SEND))
- goto recalc_window;
-
- if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL)
- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
-
- /*
- * If we've sent any frame more than once, we cut the
- * send window size in half. There is always a minimum
- * window size of one available.
- */
-recalc_window:
- if (reduce_win) {
- scp->snd_window >>= 1;
- if (scp->snd_window < NSP_MIN_WINDOW)
- scp->snd_window = NSP_MIN_WINDOW;
- }
-}
-
-int dn_nsp_xmit_timeout(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- dn_nsp_output(sk);
-
- if (!skb_queue_empty(&scp->data_xmit_queue) ||
- !skb_queue_empty(&scp->other_xmit_queue))
- scp->persist = dn_nsp_persist(sk);
-
- return 0;
-}
-
-static inline __le16 *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len)
-{
- unsigned char *ptr = skb_push(skb, len);
-
- BUG_ON(len < 5);
-
- *ptr++ = msgflag;
- *((__le16 *)ptr) = scp->addrrem;
- ptr += 2;
- *((__le16 *)ptr) = scp->addrloc;
- ptr += 2;
- return (__le16 __force *)ptr;
-}
-
-static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short acknum = scp->numdat_rcv & 0x0FFF;
- unsigned short ackcrs = scp->numoth_rcv & 0x0FFF;
- __le16 *ptr;
-
- BUG_ON(hlen < 9);
-
- scp->ackxmt_dat = acknum;
- scp->ackxmt_oth = ackcrs;
- acknum |= 0x8000;
- ackcrs |= 0x8000;
-
- /* If this is an "other data/ack" message, swap acknum and ackcrs */
- if (other)
- swap(acknum, ackcrs);
-
- /* Set "cross subchannel" bit in ackcrs */
- ackcrs |= 0x2000;
-
- ptr = dn_mk_common_header(scp, skb, msgflag, hlen);
-
- *ptr++ = cpu_to_le16(acknum);
- *ptr++ = cpu_to_le16(ackcrs);
-
- return ptr;
-}
-
-static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- __le16 *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth);
-
- if (unlikely(oth)) {
- cb->segnum = scp->numoth;
- seq_add(&scp->numoth, 1);
- } else {
- cb->segnum = scp->numdat;
- seq_add(&scp->numdat, 1);
- }
- *(ptr++) = cpu_to_le16(cb->segnum);
-
- return ptr;
-}
-
-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
- gfp_t gfp, int oth)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
-
- cb->xmit_count = 0;
- dn_nsp_mk_data_header(sk, skb, oth);
-
- /*
- * Slow start: If we have been idle for more than
- * one RTT, then reset window to min size.
- */
- if ((jiffies - scp->stamp) > t)
- scp->snd_window = NSP_MIN_WINDOW;
-
- if (oth)
- skb_queue_tail(&scp->other_xmit_queue, skb);
- else
- skb_queue_tail(&scp->data_xmit_queue, skb);
-
- if (scp->flowrem_sw != DN_SEND)
- return;
-
- dn_nsp_clone_and_send(skb, gfp);
-}
-
-
-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb2, *n, *ack = NULL;
- int wakeup = 0;
- int try_retrans = 0;
- unsigned long reftime = cb->stamp;
- unsigned long pkttime;
- unsigned short xmit_count;
- unsigned short segnum;
-
- skb_queue_walk_safe(q, skb2, n) {
- struct dn_skb_cb *cb2 = DN_SKB_CB(skb2);
-
- if (dn_before_or_equal(cb2->segnum, acknum))
- ack = skb2;
-
- /* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */
-
- if (ack == NULL)
- continue;
-
- /* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */
-
- /* Does _last_ packet acked have xmit_count > 1 */
- try_retrans = 0;
- /* Remember to wake up the sending process */
- wakeup = 1;
- /* Keep various statistics */
- pkttime = cb2->stamp;
- xmit_count = cb2->xmit_count;
- segnum = cb2->segnum;
- /* Remove and drop ack'ed packet */
- skb_unlink(ack, q);
- kfree_skb(ack);
- ack = NULL;
-
- /*
- * We don't expect to see acknowledgements for packets we
- * haven't sent yet.
- */
- WARN_ON(xmit_count == 0);
-
- /*
- * If the packet has only been sent once, we can use it
- * to calculate the RTT and also open the window a little
- * further.
- */
- if (xmit_count == 1) {
- if (dn_equal(segnum, acknum))
- dn_nsp_rtt(sk, (long)(pkttime - reftime));
-
- if (scp->snd_window < scp->max_window)
- scp->snd_window++;
- }
-
- /*
- * Packet has been sent more than once. If this is the last
- * packet to be acknowledged then we want to send the next
- * packet in the send queue again (assumes the remote host does
- * go-back-N error control).
- */
- if (xmit_count > 1)
- try_retrans = 1;
- }
-
- if (try_retrans)
- dn_nsp_output(sk);
-
- return wakeup;
-}
-
-void dn_nsp_send_data_ack(struct sock *sk)
-{
- struct sk_buff *skb = NULL;
-
- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
- return;
-
- skb_reserve(skb, 9);
- dn_mk_ack_header(sk, skb, 0x04, 9, 0);
- dn_nsp_send(skb);
-}
-
-void dn_nsp_send_oth_ack(struct sock *sk)
-{
- struct sk_buff *skb = NULL;
-
- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
- return;
-
- skb_reserve(skb, 9);
- dn_mk_ack_header(sk, skb, 0x14, 9, 1);
- dn_nsp_send(skb);
-}
-
-
-void dn_send_conn_ack (struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb = NULL;
- struct nsp_conn_ack_msg *msg;
-
- if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL)
- return;
-
- msg = skb_put(skb, 3);
- msg->msgflg = 0x24;
- msg->dstaddr = scp->addrrem;
-
- dn_nsp_send(skb);
-}
-
-static int dn_nsp_retrans_conn_conf(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CC)
- dn_send_conn_conf(sk, GFP_ATOMIC);
-
- return 0;
-}
-
-void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb = NULL;
- struct nsp_conn_init_msg *msg;
- __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
-
- if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
- return;
-
- msg = skb_put(skb, sizeof(*msg));
- msg->msgflg = 0x28;
- msg->dstaddr = scp->addrrem;
- msg->srcaddr = scp->addrloc;
- msg->services = scp->services_loc;
- msg->info = scp->info_loc;
- msg->segsize = cpu_to_le16(scp->segsize_loc);
-
- skb_put_u8(skb, len);
-
- if (len > 0)
- skb_put_data(skb, scp->conndata_out.opt_data, len);
-
-
- dn_nsp_send(skb);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_retrans_conn_conf;
-}
-
-
-static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
- unsigned short reason, gfp_t gfp,
- struct dst_entry *dst,
- int ddl, unsigned char *dd, __le16 rem, __le16 loc)
-{
- struct sk_buff *skb = NULL;
- int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0);
- unsigned char *msg;
-
- if ((dst == NULL) || (rem == 0)) {
- net_dbg_ratelimited("DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n",
- le16_to_cpu(rem), dst);
- return;
- }
-
- if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL)
- return;
-
- msg = skb_put(skb, size);
- *msg++ = msgflg;
- *(__le16 *)msg = rem;
- msg += 2;
- *(__le16 *)msg = loc;
- msg += 2;
- *(__le16 *)msg = cpu_to_le16(reason);
- msg += 2;
- if (msgflg == NSP_DISCINIT)
- *msg++ = ddl;
-
- if (ddl) {
- memcpy(msg, dd, ddl);
- }
-
- /*
- * This doesn't go via the dn_nsp_send() function since we need
- * to be able to send disc packets out which have no socket
- * associations.
- */
- skb_dst_set(skb, dst_clone(dst));
- dst_output(&init_net, skb->sk, skb);
-}
-
-
-void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
- unsigned short reason, gfp_t gfp)
-{
- struct dn_scp *scp = DN_SK(sk);
- int ddl = 0;
-
- if (msgflg == NSP_DISCINIT)
- ddl = le16_to_cpu(scp->discdata_out.opt_optl);
-
- if (reason == 0)
- reason = le16_to_cpu(scp->discdata_out.opt_status);
-
- dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl,
- scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
-}
-
-
-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
- unsigned short reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int ddl = 0;
- gfp_t gfp = GFP_ATOMIC;
-
- dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl,
- NULL, cb->src_port, cb->dst_port);
-}
-
-
-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb;
- unsigned char *ptr;
- gfp_t gfp = GFP_ATOMIC;
-
- if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
- return;
-
- skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
- ptr = skb_put(skb, 2);
- DN_SKB_CB(skb)->nsp_flags = 0x10;
- *ptr++ = lsflags;
- *ptr = fcval;
-
- dn_nsp_queue_xmit(sk, skb, gfp, 1);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_xmit_timeout;
-}
-
-static int dn_nsp_retrans_conninit(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI)
- dn_nsp_send_conninit(sk, NSP_RCI);
-
- return 0;
-}
-
-void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct nsp_conn_init_msg *msg;
- unsigned char aux;
- unsigned char menuver;
- struct dn_skb_cb *cb;
- unsigned char type = 1;
- gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
- struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
-
- if (!skb)
- return;
-
- cb = DN_SKB_CB(skb);
- msg = skb_put(skb, sizeof(*msg));
-
- msg->msgflg = msgflg;
- msg->dstaddr = 0x0000; /* Remote Node will assign it*/
-
- msg->srcaddr = scp->addrloc;
- msg->services = scp->services_loc; /* Requested flow control */
- msg->info = scp->info_loc; /* Version Number */
- msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */
-
- if (scp->peer.sdn_objnum)
- type = 0;
-
- skb_put(skb, dn_sockaddr2username(&scp->peer,
- skb_tail_pointer(skb), type));
- skb_put(skb, dn_sockaddr2username(&scp->addr,
- skb_tail_pointer(skb), 2));
-
- menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
- if (scp->peer.sdn_flags & SDF_PROXY)
- menuver |= DN_MENUVER_PRX;
- if (scp->peer.sdn_flags & SDF_UICPROXY)
- menuver |= DN_MENUVER_UIC;
-
- skb_put_u8(skb, menuver); /* Menu Version */
-
- aux = scp->accessdata.acc_userl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_user, aux);
-
- aux = scp->accessdata.acc_passl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_pass, aux);
-
- aux = scp->accessdata.acc_accl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_acc, aux);
-
- aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->conndata_out.opt_data, aux);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_retrans_conninit;
-
- cb->rt_flags = DN_RT_F_RQR;
-
- dn_nsp_send(skb);
-}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
deleted file mode 100644
index 7e85f2a1ae25..000000000000
--- a/net/decnet/dn_route.c
+++ /dev/null
@@ -1,1922 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Functions (Endnode and Router)
- *
- * Authors: Steve Whitehouse <SteveW@ACM.org>
- * Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- * Steve Whitehouse : Fixes to allow "intra-ethernet" and
- * "return-to-sender" bits on outgoing
- * packets.
- * Steve Whitehouse : Timeouts for cached routes.
- * Steve Whitehouse : Use dst cache for input routes too.
- * Steve Whitehouse : Fixed error values in dn_send_skb.
- * Steve Whitehouse : Rework routing functions to better fit
- * DECnet routing design
- * Alexey Kuznetsov : New SMP locking
- * Steve Whitehouse : More SMP locking changes & dn_cache_dump()
- * Steve Whitehouse : Prerouting NF hook, now really is prerouting.
- * Fixed possible skb leak in rtnetlink funcs.
- * Steve Whitehouse : Dave Miller's dynamic hash table sizing and
- * Alexey Kuznetsov's finer grained locking
- * from ipv4/route.c.
- * Steve Whitehouse : Routing is now starting to look like a
- * sensible set of code now, mainly due to
- * my copying the IPv4 routing code. The
- * hooks here are modified and will continue
- * to evolve for a while.
- * Steve Whitehouse : Real SMP at last :-) Also new netfilter
- * stuff. Look out raw sockets your days
- * are numbered!
- * Steve Whitehouse : Added return-to-sender functions. Added
- * backlog congestion level return codes.
- * Steve Whitehouse : Fixed bug where routes were set up with
- * no ref count on net devices.
- * Steve Whitehouse : RCU for the route cache
- * Steve Whitehouse : Preparations for the flow cache
- * Steve Whitehouse : Prepare for nonlinear skbs
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/in_route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/mm.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/rtnetlink.h>
-#include <linux/string.h>
-#include <linux/netfilter_decnet.h>
-#include <linux/rcupdate.h>
-#include <linux/times.h>
-#include <linux/export.h>
-#include <asm/errno.h>
-#include <net/net_namespace.h>
-#include <net/netlink.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_nsp.h>
-#include <net/dn_route.h>
-#include <net/dn_neigh.h>
-#include <net/dn_fib.h>
-
-struct dn_rt_hash_bucket {
- struct dn_route __rcu *chain;
- spinlock_t lock;
-};
-
-extern struct neigh_table dn_neigh_table;
-
-
-static unsigned char dn_hiord_addr[6] = {0xAA, 0x00, 0x04, 0x00, 0x00, 0x00};
-
-static const int dn_rt_min_delay = 2 * HZ;
-static const int dn_rt_max_delay = 10 * HZ;
-static const int dn_rt_mtu_expires = 10 * 60 * HZ;
-
-static unsigned long dn_rt_deadline;
-
-static int dn_dst_gc(struct dst_ops *ops);
-static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
-static unsigned int dn_dst_mtu(const struct dst_entry *dst);
-static void dn_dst_destroy(struct dst_entry *);
-static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
-static void dn_dst_link_failure(struct sk_buff *);
-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb , u32 mtu,
- bool confirm_neigh);
-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb);
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr);
-static int dn_route_input(struct sk_buff *);
-static void dn_run_flush(struct timer_list *unused);
-
-static struct dn_rt_hash_bucket *dn_rt_hash_table;
-static unsigned int dn_rt_hash_mask;
-
-static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
-int decnet_dst_gc_interval = 2;
-
-static struct dst_ops dn_dst_ops = {
- .family = PF_DECnet,
- .gc_thresh = 128,
- .gc = dn_dst_gc,
- .check = dn_dst_check,
- .default_advmss = dn_dst_default_advmss,
- .mtu = dn_dst_mtu,
- .cow_metrics = dst_cow_metrics_generic,
- .destroy = dn_dst_destroy,
- .ifdown = dn_dst_ifdown,
- .negative_advice = dn_dst_negative_advice,
- .link_failure = dn_dst_link_failure,
- .update_pmtu = dn_dst_update_pmtu,
- .redirect = dn_dst_redirect,
- .neigh_lookup = dn_dst_neigh_lookup,
-};
-
-static void dn_dst_destroy(struct dst_entry *dst)
-{
- struct dn_route *rt = (struct dn_route *) dst;
-
- if (rt->n)
- neigh_release(rt->n);
- dst_destroy_metrics_generic(dst);
-}
-
-static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how)
-{
- if (how) {
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
-
- if (n && n->dev == dev) {
- n->dev = dev_net(dev)->loopback_dev;
- dev_hold(n->dev);
- dev_put(dev);
- }
- }
-}
-
-static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
-{
- __u16 tmp = (__u16 __force)(src ^ dst);
- tmp ^= (tmp >> 3);
- tmp ^= (tmp >> 5);
- tmp ^= (tmp >> 10);
- return dn_rt_hash_mask & (unsigned int)tmp;
-}
-
-static void dn_dst_check_expire(struct timer_list *unused)
-{
- int i;
- struct dn_route *rt;
- struct dn_route __rcu **rtp;
- unsigned long now = jiffies;
- unsigned long expire = 120 * HZ;
-
- for (i = 0; i <= dn_rt_hash_mask; i++) {
- rtp = &dn_rt_hash_table[i].chain;
-
- spin_lock(&dn_rt_hash_table[i].lock);
- while ((rt = rcu_dereference_protected(*rtp,
- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
- if (atomic_read(&rt->dst.__refcnt) > 1 ||
- (now - rt->dst.lastuse) < expire) {
- rtp = &rt->dn_next;
- continue;
- }
- *rtp = rt->dn_next;
- rt->dn_next = NULL;
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
- spin_unlock(&dn_rt_hash_table[i].lock);
-
- if ((jiffies - now) > 0)
- break;
- }
-
- mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
-}
-
-static int dn_dst_gc(struct dst_ops *ops)
-{
- struct dn_route *rt;
- struct dn_route __rcu **rtp;
- int i;
- unsigned long now = jiffies;
- unsigned long expire = 10 * HZ;
-
- for (i = 0; i <= dn_rt_hash_mask; i++) {
-
- spin_lock_bh(&dn_rt_hash_table[i].lock);
- rtp = &dn_rt_hash_table[i].chain;
-
- while ((rt = rcu_dereference_protected(*rtp,
- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
- if (atomic_read(&rt->dst.__refcnt) > 1 ||
- (now - rt->dst.lastuse) < expire) {
- rtp = &rt->dn_next;
- continue;
- }
- *rtp = rt->dn_next;
- rt->dn_next = NULL;
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- break;
- }
- spin_unlock_bh(&dn_rt_hash_table[i].lock);
- }
-
- return 0;
-}
-
-/*
- * The decnet standards don't impose a particular minimum mtu, what they
- * do insist on is that the routing layer accepts a datagram of at least
- * 230 bytes long. Here we have to subtract the routing header length from
- * 230 to get the minimum acceptable mtu. If there is no neighbour, then we
- * assume the worst and use a long header size.
- *
- * We update both the mtu and the advertised mss (i.e. the segment size we
- * advertise to the other end).
- */
-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu,
- bool confirm_neigh)
-{
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
- u32 min_mtu = 230;
- struct dn_dev *dn;
-
- dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL;
-
- if (dn && dn->use_long == 0)
- min_mtu -= 6;
- else
- min_mtu -= 21;
-
- if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
- if (!(dst_metric_locked(dst, RTAX_MTU))) {
- dst_metric_set(dst, RTAX_MTU, mtu);
- dst_set_expires(dst, dn_rt_mtu_expires);
- }
- if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
- u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
- u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS);
- if (!existing_mss || existing_mss > mss)
- dst_metric_set(dst, RTAX_ADVMSS, mss);
- }
- }
-}
-
-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb)
-{
-}
-
-/*
- * When a route has been marked obsolete. (e.g. routing cache flush)
- */
-static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie)
-{
- return NULL;
-}
-
-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
-{
- dst_release(dst);
- return NULL;
-}
-
-static void dn_dst_link_failure(struct sk_buff *skb)
-{
-}
-
-static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
-{
- return ((fl1->daddr ^ fl2->daddr) |
- (fl1->saddr ^ fl2->saddr) |
- (fl1->flowidn_mark ^ fl2->flowidn_mark) |
- (fl1->flowidn_scope ^ fl2->flowidn_scope) |
- (fl1->flowidn_oif ^ fl2->flowidn_oif) |
- (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
-}
-
-static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp)
-{
- struct dn_route *rth;
- struct dn_route __rcu **rthp;
- unsigned long now = jiffies;
-
- rthp = &dn_rt_hash_table[hash].chain;
-
- spin_lock_bh(&dn_rt_hash_table[hash].lock);
- while ((rth = rcu_dereference_protected(*rthp,
- lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
- if (compare_keys(&rth->fld, &rt->fld)) {
- /* Put it first */
- *rthp = rth->dn_next;
- rcu_assign_pointer(rth->dn_next,
- dn_rt_hash_table[hash].chain);
- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
-
- dst_hold_and_use(&rth->dst, now);
- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
-
- dst_release_immediate(&rt->dst);
- *rp = rth;
- return 0;
- }
- rthp = &rth->dn_next;
- }
-
- rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
-
- dst_hold_and_use(&rt->dst, now);
- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
- *rp = rt;
- return 0;
-}
-
-static void dn_run_flush(struct timer_list *unused)
-{
- int i;
- struct dn_route *rt, *next;
-
- for (i = 0; i < dn_rt_hash_mask; i++) {
- spin_lock_bh(&dn_rt_hash_table[i].lock);
-
- rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL);
- if (!rt)
- goto nothing_to_declare;
-
- for (; rt; rt = next) {
- next = rcu_dereference_raw(rt->dn_next);
- RCU_INIT_POINTER(rt->dn_next, NULL);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
-
-nothing_to_declare:
- spin_unlock_bh(&dn_rt_hash_table[i].lock);
- }
-}
-
-static DEFINE_SPINLOCK(dn_rt_flush_lock);
-
-void dn_rt_cache_flush(int delay)
-{
- unsigned long now = jiffies;
- int user_mode = !in_interrupt();
-
- if (delay < 0)
- delay = dn_rt_min_delay;
-
- spin_lock_bh(&dn_rt_flush_lock);
-
- if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) {
- long tmo = (long)(dn_rt_deadline - now);
-
- if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay)
- tmo = 0;
-
- if (delay > tmo)
- delay = tmo;
- }
-
- if (delay <= 0) {
- spin_unlock_bh(&dn_rt_flush_lock);
- dn_run_flush(NULL);
- return;
- }
-
- if (dn_rt_deadline == 0)
- dn_rt_deadline = now + dn_rt_max_delay;
-
- dn_rt_flush_timer.expires = now + delay;
- add_timer(&dn_rt_flush_timer);
- spin_unlock_bh(&dn_rt_flush_lock);
-}
-
-/**
- * dn_return_short - Return a short packet to its sender
- * @skb: The packet to return
- *
- */
-static int dn_return_short(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- unsigned char *ptr;
- __le16 *src;
- __le16 *dst;
-
- /* Add back headers */
- skb_push(skb, skb->data - skb_network_header(skb));
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- return NET_RX_DROP;
-
- cb = DN_SKB_CB(skb);
- /* Skip packet length and point to flags */
- ptr = skb->data + 2;
- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
-
- dst = (__le16 *)ptr;
- ptr += 2;
- src = (__le16 *)ptr;
- ptr += 2;
- *ptr = 0; /* Zero hop count */
-
- swap(*src, *dst);
-
- skb->pkt_type = PACKET_OUTGOING;
- dn_rt_finish_output(skb, NULL, NULL);
- return NET_RX_SUCCESS;
-}
-
-/**
- * dn_return_long - Return a long packet to its sender
- * @skb: The long format packet to return
- *
- */
-static int dn_return_long(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- unsigned char *ptr;
- unsigned char *src_addr, *dst_addr;
- unsigned char tmp[ETH_ALEN];
-
- /* Add back all headers */
- skb_push(skb, skb->data - skb_network_header(skb));
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- return NET_RX_DROP;
-
- cb = DN_SKB_CB(skb);
- /* Ignore packet length and point to flags */
- ptr = skb->data + 2;
-
- /* Skip padding */
- if (*ptr & DN_RT_F_PF) {
- char padlen = (*ptr & ~DN_RT_F_PF);
- ptr += padlen;
- }
-
- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
- ptr += 2;
- dst_addr = ptr;
- ptr += 8;
- src_addr = ptr;
- ptr += 6;
- *ptr = 0; /* Zero hop count */
-
- /* Swap source and destination */
- memcpy(tmp, src_addr, ETH_ALEN);
- memcpy(src_addr, dst_addr, ETH_ALEN);
- memcpy(dst_addr, tmp, ETH_ALEN);
-
- skb->pkt_type = PACKET_OUTGOING;
- dn_rt_finish_output(skb, dst_addr, src_addr);
- return NET_RX_SUCCESS;
-}
-
-/**
- * dn_route_rx_packet - Try and find a route for an incoming packet
- * @net: The applicable net namespace
- * @sk: Socket packet transmitted on
- * @skb: The packet to find a route for
- *
- * Returns: result of input function if route is found, error code otherwise
- */
-static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- int err;
-
- err = dn_route_input(skb);
- if (err == 0)
- return dst_input(skb);
-
- cb = DN_SKB_CB(skb);
- if (decnet_debug_level & 4) {
- char *devname = skb->dev ? skb->dev->name : "???";
-
- printk(KERN_DEBUG
- "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
- (int)cb->rt_flags, devname, skb->len,
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
- err, skb->pkt_type);
- }
-
- if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) {
- switch (cb->rt_flags & DN_RT_PKT_MSK) {
- case DN_RT_PKT_SHORT:
- return dn_return_short(skb);
- case DN_RT_PKT_LONG:
- return dn_return_long(skb);
- }
- }
-
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static int dn_route_rx_long(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned char *ptr = skb->data;
-
- if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */
- goto drop_it;
-
- skb_pull(skb, 20);
- skb_reset_transport_header(skb);
-
- /* Destination info */
- ptr += 2;
- cb->dst = dn_eth2dn(ptr);
- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
- goto drop_it;
- ptr += 6;
-
-
- /* Source info */
- ptr += 2;
- cb->src = dn_eth2dn(ptr);
- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
- goto drop_it;
- ptr += 6;
- /* Other junk */
- ptr++;
- cb->hops = *ptr++; /* Visit Count */
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_rx_packet);
-
-drop_it:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-
-
-static int dn_route_rx_short(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned char *ptr = skb->data;
-
- if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */
- goto drop_it;
-
- skb_pull(skb, 5);
- skb_reset_transport_header(skb);
-
- cb->dst = *(__le16 *)ptr;
- ptr += 2;
- cb->src = *(__le16 *)ptr;
- ptr += 2;
- cb->hops = *ptr & 0x3f;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_rx_packet);
-
-drop_it:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- /*
- * I know we drop the packet here, but that's considered success in
- * this case
- */
- kfree_skb(skb);
- return NET_RX_SUCCESS;
-}
-
-static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- dn_dev_hello(skb);
- dn_neigh_pointopoint_hello(skb);
- return NET_RX_SUCCESS;
-}
-
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-{
- struct dn_skb_cb *cb;
- unsigned char flags = 0;
- __u16 len = le16_to_cpu(*(__le16 *)skb->data);
- struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
- unsigned char padlen = 0;
-
- if (!net_eq(dev_net(dev), &init_net))
- goto dump_it;
-
- if (dn == NULL)
- goto dump_it;
-
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (!skb)
- goto out;
-
- if (!pskb_may_pull(skb, 3))
- goto dump_it;
-
- skb_pull(skb, 2);
-
- if (len > skb->len)
- goto dump_it;
-
- skb_trim(skb, len);
-
- flags = *skb->data;
-
- cb = DN_SKB_CB(skb);
- cb->stamp = jiffies;
- cb->iif = dev->ifindex;
-
- /*
- * If we have padding, remove it.
- */
- if (flags & DN_RT_F_PF) {
- padlen = flags & ~DN_RT_F_PF;
- if (!pskb_may_pull(skb, padlen + 1))
- goto dump_it;
- skb_pull(skb, padlen);
- flags = *skb->data;
- }
-
- skb_reset_network_header(skb);
-
- /*
- * Weed out future version DECnet
- */
- if (flags & DN_RT_F_VER)
- goto dump_it;
-
- cb->rt_flags = flags;
-
- if (decnet_debug_level & 1)
- printk(KERN_DEBUG
- "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
- (int)flags, dev->name, len, skb->len,
- padlen);
-
- if (flags & DN_RT_PKT_CNTL) {
- if (unlikely(skb_linearize(skb)))
- goto dump_it;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_INIT:
- dn_dev_init_pkt(skb);
- break;
- case DN_RT_PKT_VERI:
- dn_dev_veri_pkt(skb);
- break;
- }
-
- if (dn->parms.state != DN_DEV_S_RU)
- goto dump_it;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_HELO:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_ptp_hello);
-
- case DN_RT_PKT_L1RT:
- case DN_RT_PKT_L2RT:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_discard);
- case DN_RT_PKT_ERTH:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_neigh_router_hello);
-
- case DN_RT_PKT_EEDH:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_neigh_endnode_hello);
- }
- } else {
- if (dn->parms.state != DN_DEV_S_RU)
- goto dump_it;
-
- skb_pull(skb, 1); /* Pull flags */
-
- switch (flags & DN_RT_PKT_MSK) {
- case DN_RT_PKT_LONG:
- return dn_route_rx_long(skb);
- case DN_RT_PKT_SHORT:
- return dn_route_rx_short(skb);
- }
- }
-
-dump_it:
- kfree_skb(skb);
-out:
- return NET_RX_DROP;
-}
-
-static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct net_device *dev = dst->dev;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- int err = -EINVAL;
-
- if (rt->n == NULL)
- goto error;
-
- skb->dev = dev;
-
- cb->src = rt->rt_saddr;
- cb->dst = rt->rt_daddr;
-
- /*
- * Always set the Intra-Ethernet bit on all outgoing packets
- * originated on this node. Only valid flag from upper layers
- * is return-to-sender-requested. Set hop count to 0 too.
- */
- cb->rt_flags &= ~DN_RT_F_RQR;
- cb->rt_flags |= DN_RT_F_IE;
- cb->hops = 0;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
- &init_net, sk, skb, NULL, dev,
- dn_to_neigh_output);
-
-error:
- net_dbg_ratelimited("dn_output: This should not happen\n");
-
- kfree_skb(skb);
-
- return err;
-}
-
-static int dn_forward(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dst_entry *dst = skb_dst(skb);
- struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
- struct dn_route *rt;
- int header_len;
- struct net_device *dev = skb->dev;
-
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
- /* Ensure that we have enough space for headers */
- rt = (struct dn_route *)skb_dst(skb);
- header_len = dn_db->use_long ? 21 : 6;
- if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
- goto drop;
-
- /*
- * Hop count exceeded.
- */
- if (++cb->hops > 30)
- goto drop;
-
- skb->dev = rt->dst.dev;
-
- /*
- * If packet goes out same interface it came in on, then set
- * the Intra-Ethernet bit. This has no effect for short
- * packets, so we don't need to test for them here.
- */
- cb->rt_flags &= ~DN_RT_F_IE;
- if (rt->rt_flags & RTCF_DOREDIRECT)
- cb->rt_flags |= DN_RT_F_IE;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
- &init_net, NULL, skb, dev, skb->dev,
- dn_to_neigh_output);
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-/*
- * Used to catch bugs. This should never normally get
- * called.
- */
-static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
-
- kfree_skb(skb);
-
- return NET_RX_DROP;
-}
-
-static int dn_rt_bug(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
-
- kfree_skb(skb);
-
- return NET_RX_DROP;
-}
-
-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
-{
- return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
-}
-
-static unsigned int dn_dst_mtu(const struct dst_entry *dst)
-{
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
-
- return mtu ? : dst->dev->mtu;
-}
-
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
-{
- return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
-}
-
-static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
-{
- struct dn_fib_info *fi = res->fi;
- struct net_device *dev = rt->dst.dev;
- unsigned int mss_metric;
- struct neighbour *n;
-
- if (fi) {
- if (DN_FIB_RES_GW(*res) &&
- DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
- rt->rt_gateway = DN_FIB_RES_GW(*res);
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
- }
- rt->rt_type = res->type;
-
- if (dev != NULL && rt->n == NULL) {
- n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
- if (IS_ERR(n))
- return PTR_ERR(n);
- rt->n = n;
- }
-
- if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
- dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
- mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
- if (mss_metric) {
- unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
- if (mss_metric > mss)
- dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
- }
- return 0;
-}
-
-static inline int dn_match_addr(__le16 addr1, __le16 addr2)
-{
- __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
- int match = 16;
- while (tmp) {
- tmp >>= 1;
- match--;
- }
- return match;
-}
-
-static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
-{
- __le16 saddr = 0;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int best_match = 0;
- int ret;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa != NULL;
- ifa = rcu_dereference(ifa->ifa_next)) {
- if (ifa->ifa_scope > scope)
- continue;
- if (!daddr) {
- saddr = ifa->ifa_local;
- break;
- }
- ret = dn_match_addr(daddr, ifa->ifa_local);
- if (ret > best_match)
- saddr = ifa->ifa_local;
- if (best_match == 0)
- saddr = ifa->ifa_local;
- }
- rcu_read_unlock();
-
- return saddr;
-}
-
-static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res)
-{
- return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope);
-}
-
-static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res)
-{
- __le16 mask = dnet_make_mask(res->prefixlen);
- return (daddr&~mask)|res->fi->fib_nh->nh_gw;
-}
-
-static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
-{
- struct flowidn fld = {
- .daddr = oldflp->daddr,
- .saddr = oldflp->saddr,
- .flowidn_scope = RT_SCOPE_UNIVERSE,
- .flowidn_mark = oldflp->flowidn_mark,
- .flowidn_iif = LOOPBACK_IFINDEX,
- .flowidn_oif = oldflp->flowidn_oif,
- };
- struct dn_route *rt = NULL;
- struct net_device *dev_out = NULL, *dev;
- struct neighbour *neigh = NULL;
- unsigned int hash;
- unsigned int flags = 0;
- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST };
- int err;
- int free_res = 0;
- __le16 gateway = 0;
-
- if (decnet_debug_level & 16)
- printk(KERN_DEBUG
- "dn_route_output_slow: dst=%04x src=%04x mark=%d"
- " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
- le16_to_cpu(oldflp->saddr),
- oldflp->flowidn_mark, LOOPBACK_IFINDEX,
- oldflp->flowidn_oif);
-
- /* If we have an output interface, verify its a DECnet device */
- if (oldflp->flowidn_oif) {
- dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
- err = -ENODEV;
- if (dev_out && dev_out->dn_ptr == NULL) {
- dev_put(dev_out);
- dev_out = NULL;
- }
- if (dev_out == NULL)
- goto out;
- }
-
- /* If we have a source address, verify that its a local address */
- if (oldflp->saddr) {
- err = -EADDRNOTAVAIL;
-
- if (dev_out) {
- if (dn_dev_islocal(dev_out, oldflp->saddr))
- goto source_ok;
- dev_put(dev_out);
- goto out;
- }
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (!dev->dn_ptr)
- continue;
- if (!dn_dev_islocal(dev, oldflp->saddr))
- continue;
- if ((dev->flags & IFF_LOOPBACK) &&
- oldflp->daddr &&
- !dn_dev_islocal(dev, oldflp->daddr))
- continue;
-
- dev_out = dev;
- break;
- }
- rcu_read_unlock();
- if (dev_out == NULL)
- goto out;
- dev_hold(dev_out);
-source_ok:
- ;
- }
-
- /* No destination? Assume its local */
- if (!fld.daddr) {
- fld.daddr = fld.saddr;
-
- dev_put(dev_out);
- err = -EINVAL;
- dev_out = init_net.loopback_dev;
- if (!dev_out->dn_ptr)
- goto out;
- err = -EADDRNOTAVAIL;
- dev_hold(dev_out);
- if (!fld.daddr) {
- fld.daddr =
- fld.saddr = dnet_select_source(dev_out, 0,
- RT_SCOPE_HOST);
- if (!fld.daddr)
- goto done;
- }
- fld.flowidn_oif = LOOPBACK_IFINDEX;
- res.type = RTN_LOCAL;
- goto make_route;
- }
-
- if (decnet_debug_level & 16)
- printk(KERN_DEBUG
- "dn_route_output_slow: initial checks complete."
- " dst=%04x src=%04x oif=%d try_hard=%d\n",
- le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
- fld.flowidn_oif, try_hard);
-
- /*
- * N.B. If the kernel is compiled without router support then
- * dn_fib_lookup() will evaluate to non-zero so this if () block
- * will always be executed.
- */
- err = -ESRCH;
- if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
- struct dn_dev *dn_db;
- if (err != -ESRCH)
- goto out;
- /*
- * Here the fallback is basically the standard algorithm for
- * routing in endnodes which is described in the DECnet routing
- * docs
- *
- * If we are not trying hard, look in neighbour cache.
- * The result is tested to ensure that if a specific output
- * device/source address was requested, then we honour that
- * here
- */
- if (!try_hard) {
- neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
- if (neigh) {
- if ((oldflp->flowidn_oif &&
- (neigh->dev->ifindex != oldflp->flowidn_oif)) ||
- (oldflp->saddr &&
- (!dn_dev_islocal(neigh->dev,
- oldflp->saddr)))) {
- neigh_release(neigh);
- neigh = NULL;
- } else {
- dev_put(dev_out);
- if (dn_dev_islocal(neigh->dev, fld.daddr)) {
- dev_out = init_net.loopback_dev;
- res.type = RTN_LOCAL;
- } else {
- dev_out = neigh->dev;
- }
- dev_hold(dev_out);
- goto select_source;
- }
- }
- }
-
- /* Not there? Perhaps its a local address */
- if (dev_out == NULL)
- dev_out = dn_dev_get_default();
- err = -ENODEV;
- if (dev_out == NULL)
- goto out;
- dn_db = rcu_dereference_raw(dev_out->dn_ptr);
- if (!dn_db)
- goto e_inval;
- /* Possible improvement - check all devices for local addr */
- if (dn_dev_islocal(dev_out, fld.daddr)) {
- dev_put(dev_out);
- dev_out = init_net.loopback_dev;
- dev_hold(dev_out);
- res.type = RTN_LOCAL;
- goto select_source;
- }
- /* Not local either.... try sending it to the default router */
- neigh = neigh_clone(dn_db->router);
- BUG_ON(neigh && neigh->dev != dev_out);
-
- /* Ok then, we assume its directly connected and move on */
-select_source:
- if (neigh)
- gateway = ((struct dn_neigh *)neigh)->addr;
- if (gateway == 0)
- gateway = fld.daddr;
- if (fld.saddr == 0) {
- fld.saddr = dnet_select_source(dev_out, gateway,
- res.type == RTN_LOCAL ?
- RT_SCOPE_HOST :
- RT_SCOPE_LINK);
- if (fld.saddr == 0 && res.type != RTN_LOCAL)
- goto e_addr;
- }
- fld.flowidn_oif = dev_out->ifindex;
- goto make_route;
- }
- free_res = 1;
-
- if (res.type == RTN_NAT)
- goto e_inval;
-
- if (res.type == RTN_LOCAL) {
- if (!fld.saddr)
- fld.saddr = fld.daddr;
- dev_put(dev_out);
- dev_out = init_net.loopback_dev;
- dev_hold(dev_out);
- if (!dev_out->dn_ptr)
- goto e_inval;
- fld.flowidn_oif = dev_out->ifindex;
- if (res.fi)
- dn_fib_info_put(res.fi);
- res.fi = NULL;
- goto make_route;
- }
-
- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
- dn_fib_select_multipath(&fld, &res);
-
- /*
- * We could add some logic to deal with default routes here and
- * get rid of some of the special casing above.
- */
-
- if (!fld.saddr)
- fld.saddr = DN_FIB_RES_PREFSRC(res);
-
- dev_put(dev_out);
- dev_out = DN_FIB_RES_DEV(res);
- dev_hold(dev_out);
- fld.flowidn_oif = dev_out->ifindex;
- gateway = DN_FIB_RES_GW(res);
-
-make_route:
- if (dev_out->flags & IFF_LOOPBACK)
- flags |= RTCF_LOCAL;
-
- rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, 0);
- if (rt == NULL)
- goto e_nobufs;
-
- rt->dn_next = NULL;
- memset(&rt->fld, 0, sizeof(rt->fld));
- rt->fld.saddr = oldflp->saddr;
- rt->fld.daddr = oldflp->daddr;
- rt->fld.flowidn_oif = oldflp->flowidn_oif;
- rt->fld.flowidn_iif = 0;
- rt->fld.flowidn_mark = oldflp->flowidn_mark;
-
- rt->rt_saddr = fld.saddr;
- rt->rt_daddr = fld.daddr;
- rt->rt_gateway = gateway ? gateway : fld.daddr;
- rt->rt_local_src = fld.saddr;
-
- rt->rt_dst_map = fld.daddr;
- rt->rt_src_map = fld.saddr;
-
- rt->n = neigh;
- neigh = NULL;
-
- rt->dst.lastuse = jiffies;
- rt->dst.output = dn_output;
- rt->dst.input = dn_rt_bug;
- rt->rt_flags = flags;
- if (flags & RTCF_LOCAL)
- rt->dst.input = dn_nsp_rx;
-
- err = dn_rt_set_next_hop(rt, &res);
- if (err)
- goto e_neighbour;
-
- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
- /* dn_insert_route() increments dst->__refcnt */
- dn_insert_route(rt, hash, (struct dn_route **)pprt);
-
-done:
- if (neigh)
- neigh_release(neigh);
- if (free_res)
- dn_fib_res_put(&res);
- dev_put(dev_out);
-out:
- return err;
-
-e_addr:
- err = -EADDRNOTAVAIL;
- goto done;
-e_inval:
- err = -EINVAL;
- goto done;
-e_nobufs:
- err = -ENOBUFS;
- goto done;
-e_neighbour:
- dst_release_immediate(&rt->dst);
- goto e_nobufs;
-}
-
-
-/*
- * N.B. The flags may be moved into the flowi at some future stage.
- */
-static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
-{
- unsigned int hash = dn_hash(flp->saddr, flp->daddr);
- struct dn_route *rt = NULL;
-
- if (!(flags & MSG_TRYHARD)) {
- rcu_read_lock_bh();
- for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
- rt = rcu_dereference_bh(rt->dn_next)) {
- if ((flp->daddr == rt->fld.daddr) &&
- (flp->saddr == rt->fld.saddr) &&
- (flp->flowidn_mark == rt->fld.flowidn_mark) &&
- dn_is_output_route(rt) &&
- (rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_hold_and_use(&rt->dst, jiffies);
- rcu_read_unlock_bh();
- *pprt = &rt->dst;
- return 0;
- }
- }
- rcu_read_unlock_bh();
- }
-
- return dn_route_output_slow(pprt, flp, flags);
-}
-
-static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
-{
- int err;
-
- err = __dn_route_output_key(pprt, flp, flags);
- if (err == 0 && flp->flowidn_proto) {
- *pprt = xfrm_lookup(&init_net, *pprt,
- flowidn_to_flowi(flp), NULL, 0);
- if (IS_ERR(*pprt)) {
- err = PTR_ERR(*pprt);
- *pprt = NULL;
- }
- }
- return err;
-}
-
-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
-{
- int err;
-
- err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
- if (err == 0 && fl->flowidn_proto) {
- *pprt = xfrm_lookup(&init_net, *pprt,
- flowidn_to_flowi(fl), sk, 0);
- if (IS_ERR(*pprt)) {
- err = PTR_ERR(*pprt);
- *pprt = NULL;
- }
- }
- return err;
-}
-
-static int dn_route_input_slow(struct sk_buff *skb)
-{
- struct dn_route *rt = NULL;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct net_device *in_dev = skb->dev;
- struct net_device *out_dev = NULL;
- struct dn_dev *dn_db;
- struct neighbour *neigh = NULL;
- unsigned int hash;
- int flags = 0;
- __le16 gateway = 0;
- __le16 local_src = 0;
- struct flowidn fld = {
- .daddr = cb->dst,
- .saddr = cb->src,
- .flowidn_scope = RT_SCOPE_UNIVERSE,
- .flowidn_mark = skb->mark,
- .flowidn_iif = skb->dev->ifindex,
- };
- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
- int err = -EINVAL;
- int free_res = 0;
-
- dev_hold(in_dev);
-
- dn_db = rcu_dereference(in_dev->dn_ptr);
- if (!dn_db)
- goto out;
-
- /* Zero source addresses are not allowed */
- if (fld.saddr == 0)
- goto out;
-
- /*
- * In this case we've just received a packet from a source
- * outside ourselves pretending to come from us. We don't
- * allow it any further to prevent routing loops, spoofing and
- * other nasties. Loopback packets already have the dst attached
- * so this only affects packets which have originated elsewhere.
- */
- err = -ENOTUNIQ;
- if (dn_dev_islocal(in_dev, cb->src))
- goto out;
-
- err = dn_fib_lookup(&fld, &res);
- if (err) {
- if (err != -ESRCH)
- goto out;
- /*
- * Is the destination us ?
- */
- if (!dn_dev_islocal(in_dev, cb->dst))
- goto e_inval;
-
- res.type = RTN_LOCAL;
- } else {
- __le16 src_map = fld.saddr;
- free_res = 1;
-
- out_dev = DN_FIB_RES_DEV(res);
- if (out_dev == NULL) {
- net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n");
- goto e_inval;
- }
- dev_hold(out_dev);
-
- if (res.r)
- src_map = fld.saddr; /* no NAT support for now */
-
- gateway = DN_FIB_RES_GW(res);
- if (res.type == RTN_NAT) {
- fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
- dn_fib_res_put(&res);
- free_res = 0;
- if (dn_fib_lookup(&fld, &res))
- goto e_inval;
- free_res = 1;
- if (res.type != RTN_UNICAST)
- goto e_inval;
- flags |= RTCF_DNAT;
- gateway = fld.daddr;
- }
- fld.saddr = src_map;
- }
-
- switch (res.type) {
- case RTN_UNICAST:
- /*
- * Forwarding check here, we only check for forwarding
- * being turned off, if you want to only forward intra
- * area, its up to you to set the routing tables up
- * correctly.
- */
- if (dn_db->parms.forwarding == 0)
- goto e_inval;
-
- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
- dn_fib_select_multipath(&fld, &res);
-
- /*
- * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
- * flag as a hint to set the intra-ethernet bit when
- * forwarding. If we've got NAT in operation, we don't do
- * this optimisation.
- */
- if (out_dev == in_dev && !(flags & RTCF_NAT))
- flags |= RTCF_DOREDIRECT;
-
- local_src = DN_FIB_RES_PREFSRC(res);
- break;
- case RTN_BLACKHOLE:
- case RTN_UNREACHABLE:
- break;
- case RTN_LOCAL:
- flags |= RTCF_LOCAL;
- fld.saddr = cb->dst;
- fld.daddr = cb->src;
-
- /* Routing tables gave us a gateway */
- if (gateway)
- goto make_route;
-
- /* Packet was intra-ethernet, so we know its on-link */
- if (cb->rt_flags & DN_RT_F_IE) {
- gateway = cb->src;
- goto make_route;
- }
-
- /* Use the default router if there is one */
- neigh = neigh_clone(dn_db->router);
- if (neigh) {
- gateway = ((struct dn_neigh *)neigh)->addr;
- goto make_route;
- }
-
- /* Close eyes and pray */
- gateway = cb->src;
- goto make_route;
- default:
- goto e_inval;
- }
-
-make_route:
- rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, 0);
- if (rt == NULL)
- goto e_nobufs;
-
- rt->dn_next = NULL;
- memset(&rt->fld, 0, sizeof(rt->fld));
- rt->rt_saddr = fld.saddr;
- rt->rt_daddr = fld.daddr;
- rt->rt_gateway = fld.daddr;
- if (gateway)
- rt->rt_gateway = gateway;
- rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
-
- rt->rt_dst_map = fld.daddr;
- rt->rt_src_map = fld.saddr;
-
- rt->fld.saddr = cb->src;
- rt->fld.daddr = cb->dst;
- rt->fld.flowidn_oif = 0;
- rt->fld.flowidn_iif = in_dev->ifindex;
- rt->fld.flowidn_mark = fld.flowidn_mark;
-
- rt->n = neigh;
- rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug_out;
- switch (res.type) {
- case RTN_UNICAST:
- rt->dst.input = dn_forward;
- break;
- case RTN_LOCAL:
- rt->dst.output = dn_output;
- rt->dst.input = dn_nsp_rx;
- rt->dst.dev = in_dev;
- flags |= RTCF_LOCAL;
- break;
- default:
- case RTN_UNREACHABLE:
- case RTN_BLACKHOLE:
- rt->dst.input = dst_discard;
- }
- rt->rt_flags = flags;
-
- err = dn_rt_set_next_hop(rt, &res);
- if (err)
- goto e_neighbour;
-
- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
- /* dn_insert_route() increments dst->__refcnt */
- dn_insert_route(rt, hash, &rt);
- skb_dst_set(skb, &rt->dst);
-
-done:
- if (neigh)
- neigh_release(neigh);
- if (free_res)
- dn_fib_res_put(&res);
- dev_put(in_dev);
- dev_put(out_dev);
-out:
- return err;
-
-e_inval:
- err = -EINVAL;
- goto done;
-
-e_nobufs:
- err = -ENOBUFS;
- goto done;
-
-e_neighbour:
- dst_release_immediate(&rt->dst);
- goto done;
-}
-
-static int dn_route_input(struct sk_buff *skb)
-{
- struct dn_route *rt;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned int hash = dn_hash(cb->src, cb->dst);
-
- if (skb_dst(skb))
- return 0;
-
- rcu_read_lock();
- for (rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
- rt = rcu_dereference(rt->dn_next)) {
- if ((rt->fld.saddr == cb->src) &&
- (rt->fld.daddr == cb->dst) &&
- (rt->fld.flowidn_oif == 0) &&
- (rt->fld.flowidn_mark == skb->mark) &&
- (rt->fld.flowidn_iif == cb->iif)) {
- dst_hold_and_use(&rt->dst, jiffies);
- rcu_read_unlock();
- skb_dst_set(skb, (struct dst_entry *)rt);
- return 0;
- }
- }
- rcu_read_unlock();
-
- return dn_route_input_slow(skb);
-}
-
-static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, int nowait, unsigned int flags)
-{
- struct dn_route *rt = (struct dn_route *)skb_dst(skb);
- struct rtmsg *r;
- struct nlmsghdr *nlh;
- long expires;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- r->rtm_family = AF_DECnet;
- r->rtm_dst_len = 16;
- r->rtm_src_len = 0;
- r->rtm_tos = 0;
- r->rtm_table = RT_TABLE_MAIN;
- r->rtm_type = rt->rt_type;
- r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
- r->rtm_scope = RT_SCOPE_UNIVERSE;
- r->rtm_protocol = RTPROT_UNSPEC;
-
- if (rt->rt_flags & RTCF_NOTIFY)
- r->rtm_flags |= RTM_F_NOTIFY;
-
- if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 ||
- nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0)
- goto errout;
-
- if (rt->fld.saddr) {
- r->rtm_src_len = 16;
- if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0)
- goto errout;
- }
- if (rt->dst.dev &&
- nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0)
- goto errout;
-
- /*
- * Note to self - change this if input routes reverse direction when
- * they deal only with inputs and not with replies like they do
- * currently.
- */
- if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0)
- goto errout;
-
- if (rt->rt_daddr != rt->rt_gateway &&
- nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0)
- goto errout;
-
- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
- goto errout;
-
- expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
- rt->dst.error) < 0)
- goto errout;
-
- if (dn_is_input_route(rt) &&
- nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
- goto errout;
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
- [RTA_DST] = { .type = NLA_U16 },
- [RTA_SRC] = { .type = NLA_U16 },
- [RTA_IIF] = { .type = NLA_U32 },
- [RTA_OIF] = { .type = NLA_U32 },
- [RTA_GATEWAY] = { .type = NLA_U16 },
- [RTA_PRIORITY] = { .type = NLA_U32 },
- [RTA_PREFSRC] = { .type = NLA_U16 },
- [RTA_METRICS] = { .type = NLA_NESTED },
- [RTA_MULTIPATH] = { .type = NLA_NESTED },
- [RTA_TABLE] = { .type = NLA_U32 },
- [RTA_MARK] = { .type = NLA_U32 },
-};
-
-/*
- * This is called by both endnodes and routers now.
- */
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(in_skb->sk);
- struct rtmsg *rtm = nlmsg_data(nlh);
- struct dn_route *rt = NULL;
- struct dn_skb_cb *cb;
- int err;
- struct sk_buff *skb;
- struct flowidn fld;
- struct nlattr *tb[RTA_MAX+1];
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
- rtm_dn_policy, extack);
- if (err < 0)
- return err;
-
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_proto = DNPROTO_NSP;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb == NULL)
- return -ENOBUFS;
- skb_reset_mac_header(skb);
- cb = DN_SKB_CB(skb);
-
- if (tb[RTA_SRC])
- fld.saddr = nla_get_le16(tb[RTA_SRC]);
-
- if (tb[RTA_DST])
- fld.daddr = nla_get_le16(tb[RTA_DST]);
-
- if (tb[RTA_IIF])
- fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
-
- if (fld.flowidn_iif) {
- struct net_device *dev;
- dev = __dev_get_by_index(&init_net, fld.flowidn_iif);
- if (!dev || !dev->dn_ptr) {
- kfree_skb(skb);
- return -ENODEV;
- }
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->dev = dev;
- cb->src = fld.saddr;
- cb->dst = fld.daddr;
- local_bh_disable();
- err = dn_route_input(skb);
- local_bh_enable();
- memset(cb, 0, sizeof(struct dn_skb_cb));
- rt = (struct dn_route *)skb_dst(skb);
- if (!err && -rt->dst.error)
- err = rt->dst.error;
- } else {
- if (tb[RTA_OIF])
- fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
-
- err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
- }
-
- skb->dev = NULL;
- if (err)
- goto out_free;
- skb_dst_set(skb, &rt->dst);
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- rt->rt_flags |= RTCF_NOTIFY;
-
- err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
- if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
- }
-
- return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
-
-out_free:
- kfree_skb(skb);
- return err;
-}
-
-/*
- * For routers, this is called from dn_fib_dump, but for endnodes its
- * called directly from the rtnetlink dispatch table.
- */
-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_route *rt;
- int h, s_h;
- int idx, s_idx;
- struct rtmsg *rtm;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg))
- return -EINVAL;
-
- rtm = nlmsg_data(cb->nlh);
- if (!(rtm->rtm_flags & RTM_F_CLONED))
- return 0;
-
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- for (h = 0; h <= dn_rt_hash_mask; h++) {
- if (h < s_h)
- continue;
- if (h > s_h)
- s_idx = 0;
- rcu_read_lock_bh();
- for (rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
- rt;
- rt = rcu_dereference_bh(rt->dn_next), idx++) {
- if (idx < s_idx)
- continue;
- skb_dst_set(skb, dst_clone(&rt->dst));
- if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE,
- 1, NLM_F_MULTI) < 0) {
- skb_dst_drop(skb);
- rcu_read_unlock_bh();
- goto done;
- }
- skb_dst_drop(skb);
- }
- rcu_read_unlock_bh();
- }
-
-done:
- cb->args[0] = h;
- cb->args[1] = idx;
- return skb->len;
-}
-
-#ifdef CONFIG_PROC_FS
-struct dn_rt_cache_iter_state {
- int bucket;
-};
-
-static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
-{
- struct dn_route *rt = NULL;
- struct dn_rt_cache_iter_state *s = seq->private;
-
- for (s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
- rcu_read_lock_bh();
- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
- if (rt)
- break;
- rcu_read_unlock_bh();
- }
- return rt;
-}
-
-static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
-{
- struct dn_rt_cache_iter_state *s = seq->private;
-
- rt = rcu_dereference_bh(rt->dn_next);
- while (!rt) {
- rcu_read_unlock_bh();
- if (--s->bucket < 0)
- break;
- rcu_read_lock_bh();
- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
- }
- return rt;
-}
-
-static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct dn_route *rt = dn_rt_cache_get_first(seq);
-
- if (rt) {
- while (*pos && (rt = dn_rt_cache_get_next(seq, rt)))
- --*pos;
- }
- return *pos ? NULL : rt;
-}
-
-static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct dn_route *rt = dn_rt_cache_get_next(seq, v);
- ++*pos;
- return rt;
-}
-
-static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v)
-{
- if (v)
- rcu_read_unlock_bh();
-}
-
-static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
-{
- struct dn_route *rt = v;
- char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
-
- seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
- rt->dst.dev ? rt->dst.dev->name : "*",
- dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
- dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
- atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, 0);
- return 0;
-}
-
-static const struct seq_operations dn_rt_cache_seq_ops = {
- .start = dn_rt_cache_seq_start,
- .next = dn_rt_cache_seq_next,
- .stop = dn_rt_cache_seq_stop,
- .show = dn_rt_cache_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-void __init dn_route_init(void)
-{
- int i, goal, order;
-
- dn_dst_ops.kmem_cachep =
- kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- dst_entries_init(&dn_dst_ops);
- timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
- dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
- add_timer(&dn_route_timer);
-
- goal = totalram_pages() >> (26 - PAGE_SHIFT);
-
- for (order = 0; (1UL << order) < goal; order++)
- /* NOTHING */;
-
- /*
- * Only want 1024 entries max, since the table is very, very unlikely
- * to be larger than that.
- */
- while (order && ((((1UL << order) * PAGE_SIZE) /
- sizeof(struct dn_rt_hash_bucket)) >= 2048))
- order--;
-
- do {
- dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
- sizeof(struct dn_rt_hash_bucket);
- while (dn_rt_hash_mask & (dn_rt_hash_mask - 1))
- dn_rt_hash_mask--;
- dn_rt_hash_table = (struct dn_rt_hash_bucket *)
- __get_free_pages(GFP_ATOMIC, order);
- } while (dn_rt_hash_table == NULL && --order > 0);
-
- if (!dn_rt_hash_table)
- panic("Failed to allocate DECnet route cache hash table\n");
-
- printk(KERN_INFO
- "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n",
- dn_rt_hash_mask,
- (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
-
- dn_rt_hash_mask--;
- for (i = 0; i <= dn_rt_hash_mask; i++) {
- spin_lock_init(&dn_rt_hash_table[i].lock);
- dn_rt_hash_table[i].chain = NULL;
- }
-
- dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
-
- proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
- &dn_rt_cache_seq_ops,
- sizeof(struct dn_rt_cache_iter_state), NULL);
-
-#ifdef CONFIG_DECNET_ROUTER
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
- dn_cache_getroute, dn_fib_dump, 0);
-#else
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
- dn_cache_getroute, dn_cache_dump, 0);
-#endif
-}
-
-void __exit dn_route_cleanup(void)
-{
- del_timer(&dn_route_timer);
- dn_run_flush(NULL);
-
- remove_proc_entry("decnet_cache", init_net.proc_net);
- dst_entries_destroy(&dn_dst_ops);
-}
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
deleted file mode 100644
index ee73057529cf..000000000000
--- a/net/decnet/dn_rules.c
+++ /dev/null
@@ -1,253 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Rules)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- * Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c
- *
- *
- * Changes:
- * Steve Whitehouse <steve@chygwyn.com>
- * Updated for Thomas Graf's generic rules
- *
- */
-#include <linux/net.h>
-#include <linux/init.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/export.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-static struct fib_rules_ops *dn_fib_rules_ops;
-
-struct dn_fib_rule
-{
- struct fib_rule common;
- unsigned char dst_len;
- unsigned char src_len;
- __le16 src;
- __le16 srcmask;
- __le16 dst;
- __le16 dstmask;
- __le16 srcmap;
- u8 flags;
-};
-
-
-int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res)
-{
- struct fib_lookup_arg arg = {
- .result = res,
- };
- int err;
-
- err = fib_rules_lookup(dn_fib_rules_ops,
- flowidn_to_flowi(flp), 0, &arg);
- res->r = arg.rule;
-
- return err;
-}
-
-static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
- int flags, struct fib_lookup_arg *arg)
-{
- struct flowidn *fld = &flp->u.dn;
- int err = -EAGAIN;
- struct dn_fib_table *tbl;
-
- switch(rule->action) {
- case FR_ACT_TO_TBL:
- break;
-
- case FR_ACT_UNREACHABLE:
- err = -ENETUNREACH;
- goto errout;
-
- case FR_ACT_PROHIBIT:
- err = -EACCES;
- goto errout;
-
- case FR_ACT_BLACKHOLE:
- default:
- err = -EINVAL;
- goto errout;
- }
-
- tbl = dn_fib_get_table(rule->table, 0);
- if (tbl == NULL)
- goto errout;
-
- err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result);
- if (err > 0)
- err = -EAGAIN;
-errout:
- return err;
-}
-
-static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- struct flowidn *fld = &fl->u.dn;
- __le16 daddr = fld->daddr;
- __le16 saddr = fld->saddr;
-
- if (((saddr ^ r->src) & r->srcmask) ||
- ((daddr ^ r->dst) & r->dstmask))
- return 0;
-
- return 1;
-}
-
-static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh,
- struct nlattr **tb,
- struct netlink_ext_ack *extack)
-{
- int err = -EINVAL;
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- if (frh->tos) {
- NL_SET_ERR_MSG(extack, "Invalid tos value");
- goto errout;
- }
-
- if (rule->table == RT_TABLE_UNSPEC) {
- if (rule->action == FR_ACT_TO_TBL) {
- struct dn_fib_table *table;
-
- table = dn_fib_empty_table();
- if (table == NULL) {
- err = -ENOBUFS;
- goto errout;
- }
-
- rule->table = table->n;
- }
- }
-
- if (frh->src_len)
- r->src = nla_get_le16(tb[FRA_SRC]);
-
- if (frh->dst_len)
- r->dst = nla_get_le16(tb[FRA_DST]);
-
- r->src_len = frh->src_len;
- r->srcmask = dnet_make_mask(r->src_len);
- r->dst_len = frh->dst_len;
- r->dstmask = dnet_make_mask(r->dst_len);
- err = 0;
-errout:
- return err;
-}
-
-static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
- struct nlattr **tb)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- if (frh->src_len && (r->src_len != frh->src_len))
- return 0;
-
- if (frh->dst_len && (r->dst_len != frh->dst_len))
- return 0;
-
- if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC])))
- return 0;
-
- if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST])))
- return 0;
-
- return 1;
-}
-
-unsigned int dnet_addr_type(__le16 addr)
-{
- struct flowidn fld = { .daddr = addr };
- struct dn_fib_res res;
- unsigned int ret = RTN_UNICAST;
- struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
-
- res.r = NULL;
-
- if (tb) {
- if (!tb->lookup(tb, &fld, &res)) {
- ret = res.type;
- dn_fib_res_put(&res);
- }
- }
- return ret;
-}
-
-static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- frh->dst_len = r->dst_len;
- frh->src_len = r->src_len;
- frh->tos = 0;
-
- if ((r->dst_len &&
- nla_put_le16(skb, FRA_DST, r->dst)) ||
- (r->src_len &&
- nla_put_le16(skb, FRA_SRC, r->src)))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -ENOBUFS;
-}
-
-static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
-{
- dn_rt_cache_flush(-1);
-}
-
-static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = {
- .family = AF_DECnet,
- .rule_size = sizeof(struct dn_fib_rule),
- .addr_size = sizeof(u16),
- .action = dn_fib_rule_action,
- .match = dn_fib_rule_match,
- .configure = dn_fib_rule_configure,
- .compare = dn_fib_rule_compare,
- .fill = dn_fib_rule_fill,
- .flush_cache = dn_fib_rule_flush_cache,
- .nlgroup = RTNLGRP_DECnet_RULE,
- .owner = THIS_MODULE,
- .fro_net = &init_net,
-};
-
-void __init dn_fib_rules_init(void)
-{
- dn_fib_rules_ops =
- fib_rules_register(&dn_fib_rules_ops_template, &init_net);
- BUG_ON(IS_ERR(dn_fib_rules_ops));
- BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff,
- RT_TABLE_MAIN, 0));
-}
-
-void __exit dn_fib_rules_cleanup(void)
-{
- rtnl_lock();
- fib_rules_unregister(dn_fib_rules_ops);
- rtnl_unlock();
- rcu_barrier();
-}
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
deleted file mode 100644
index 4086f9c746af..000000000000
--- a/net/decnet/dn_table.c
+++ /dev/null
@@ -1,929 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Routing Tables)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- * Mostly copied from the IPv4 routing code
- *
- *
- * Changes:
- *
- */
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/sockios.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/route.h> /* RTF_xxx */
-#include <net/neighbour.h>
-#include <net/netlink.h>
-#include <net/tcp.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-
-struct dn_zone
-{
- struct dn_zone *dz_next;
- struct dn_fib_node **dz_hash;
- int dz_nent;
- int dz_divisor;
- u32 dz_hashmask;
-#define DZ_HASHMASK(dz) ((dz)->dz_hashmask)
- int dz_order;
- __le16 dz_mask;
-#define DZ_MASK(dz) ((dz)->dz_mask)
-};
-
-struct dn_hash
-{
- struct dn_zone *dh_zones[17];
- struct dn_zone *dh_zone_list;
-};
-
-#define dz_key_0(key) ((key).datum = 0)
-
-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define endfor_nexthops(fi) }
-
-#define DN_MAX_DIVISOR 1024
-#define DN_S_ZOMBIE 1
-#define DN_S_ACCESSED 2
-
-#define DN_FIB_SCAN(f, fp) \
-for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
-
-#define DN_FIB_SCAN_KEY(f, fp, key) \
-for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
-
-#define RT_TABLE_MIN 1
-#define DN_FIB_TABLE_HASHSZ 256
-static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
-static DEFINE_RWLOCK(dn_fib_tables_lock);
-
-static struct kmem_cache *dn_hash_kmem __read_mostly;
-static int dn_fib_hash_zombies;
-
-static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
-{
- u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order);
- h ^= (h >> 10);
- h ^= (h >> 6);
- h &= DZ_HASHMASK(dz);
- return *(dn_fib_idx_t *)&h;
-}
-
-static inline dn_fib_key_t dz_key(__le16 dst, struct dn_zone *dz)
-{
- dn_fib_key_t k;
- k.datum = dst & DZ_MASK(dz);
- return k;
-}
-
-static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz)
-{
- return &dz->dz_hash[dn_hash(key, dz).datum];
-}
-
-static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz)
-{
- return dz->dz_hash[dn_hash(key, dz).datum];
-}
-
-static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b)
-{
- return a.datum == b.datum;
-}
-
-static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b)
-{
- return a.datum <= b.datum;
-}
-
-static inline void dn_rebuild_zone(struct dn_zone *dz,
- struct dn_fib_node **old_ht,
- int old_divisor)
-{
- struct dn_fib_node *f, **fp, *next;
- int i;
-
- for(i = 0; i < old_divisor; i++) {
- for(f = old_ht[i]; f; f = next) {
- next = f->fn_next;
- for(fp = dn_chain_p(f->fn_key, dz);
- *fp && dn_key_leq((*fp)->fn_key, f->fn_key);
- fp = &(*fp)->fn_next)
- /* NOTHING */;
- f->fn_next = *fp;
- *fp = f;
- }
- }
-}
-
-static void dn_rehash_zone(struct dn_zone *dz)
-{
- struct dn_fib_node **ht, **old_ht;
- int old_divisor, new_divisor;
- u32 new_hashmask;
-
- old_divisor = dz->dz_divisor;
-
- switch (old_divisor) {
- case 16:
- new_divisor = 256;
- new_hashmask = 0xFF;
- break;
- default:
- printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
- old_divisor);
- fallthrough;
- case 256:
- new_divisor = 1024;
- new_hashmask = 0x3FF;
- break;
- }
-
- ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL);
- if (ht == NULL)
- return;
-
- write_lock_bh(&dn_fib_tables_lock);
- old_ht = dz->dz_hash;
- dz->dz_hash = ht;
- dz->dz_hashmask = new_hashmask;
- dz->dz_divisor = new_divisor;
- dn_rebuild_zone(dz, old_ht, old_divisor);
- write_unlock_bh(&dn_fib_tables_lock);
- kfree(old_ht);
-}
-
-static void dn_free_node(struct dn_fib_node *f)
-{
- dn_fib_release_info(DN_FIB_INFO(f));
- kmem_cache_free(dn_hash_kmem, f);
-}
-
-
-static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
-{
- int i;
- struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL);
- if (!dz)
- return NULL;
-
- if (z) {
- dz->dz_divisor = 16;
- dz->dz_hashmask = 0x0F;
- } else {
- dz->dz_divisor = 1;
- dz->dz_hashmask = 0;
- }
-
- dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL);
- if (!dz->dz_hash) {
- kfree(dz);
- return NULL;
- }
-
- dz->dz_order = z;
- dz->dz_mask = dnet_make_mask(z);
-
- for(i = z + 1; i <= 16; i++)
- if (table->dh_zones[i])
- break;
-
- write_lock_bh(&dn_fib_tables_lock);
- if (i>16) {
- dz->dz_next = table->dh_zone_list;
- table->dh_zone_list = dz;
- } else {
- dz->dz_next = table->dh_zones[i]->dz_next;
- table->dh_zones[i]->dz_next = dz;
- }
- table->dh_zones[z] = dz;
- write_unlock_bh(&dn_fib_tables_lock);
- return dz;
-}
-
-
-static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
-{
- struct rtnexthop *nhp;
- int nhlen;
-
- if (attrs[RTA_PRIORITY] &&
- nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
- return 1;
-
- if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
- if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
- (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
- return 0;
- return 1;
- }
-
- if (!attrs[RTA_MULTIPATH])
- return 0;
-
- nhp = nla_data(attrs[RTA_MULTIPATH]);
- nhlen = nla_len(attrs[RTA_MULTIPATH]);
-
- for_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- __le16 gw;
-
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
- return -EINVAL;
- if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
- return 1;
- if (attrlen) {
- struct nlattr *gw_attr;
-
- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
- gw = gw_attr ? nla_get_le16(gw_attr) : 0;
-
- if (gw && gw != nh->nh_gw)
- return 1;
- }
- nhp = RTNH_NEXT(nhp);
- } endfor_nexthops(fi);
-
- return 0;
-}
-
-static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
-{
- size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4) /* RTA_TABLE */
- + nla_total_size(2) /* RTA_DST */
- + nla_total_size(4) /* RTA_PRIORITY */
- + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
-
- /* space for nested metrics */
- payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
-
- if (fi->fib_nhs) {
- /* Also handles the special case fib_nhs == 1 */
-
- /* each nexthop is packed in an attribute */
- size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
-
- /* may contain a gateway attribute */
- nhsize += nla_total_size(4);
-
- /* all nexthops are packed in a nested attribute */
- payload += nla_total_size(fi->fib_nhs * nhsize);
- }
-
- return payload;
-}
-
-static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
- struct dn_fib_info *fi, unsigned int flags)
-{
- struct rtmsg *rtm;
- struct nlmsghdr *nlh;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
- if (!nlh)
- return -EMSGSIZE;
-
- rtm = nlmsg_data(nlh);
- rtm->rtm_family = AF_DECnet;
- rtm->rtm_dst_len = dst_len;
- rtm->rtm_src_len = 0;
- rtm->rtm_tos = 0;
- rtm->rtm_table = tb_id;
- rtm->rtm_flags = fi->fib_flags;
- rtm->rtm_scope = scope;
- rtm->rtm_type = type;
- rtm->rtm_protocol = fi->fib_protocol;
-
- if (nla_put_u32(skb, RTA_TABLE, tb_id) < 0)
- goto errout;
-
- if (rtm->rtm_dst_len &&
- nla_put(skb, RTA_DST, 2, dst) < 0)
- goto errout;
-
- if (fi->fib_priority &&
- nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority) < 0)
- goto errout;
-
- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
- goto errout;
-
- if (fi->fib_nhs == 1) {
- if (fi->fib_nh->nh_gw &&
- nla_put_le16(skb, RTA_GATEWAY, fi->fib_nh->nh_gw) < 0)
- goto errout;
-
- if (fi->fib_nh->nh_oif &&
- nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif) < 0)
- goto errout;
- }
-
- if (fi->fib_nhs > 1) {
- struct rtnexthop *nhp;
- struct nlattr *mp_head;
-
- mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH);
- if (!mp_head)
- goto errout;
-
- for_nexthops(fi) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp))))
- goto errout;
-
- nhp->rtnh_flags = nh->nh_flags & 0xFF;
- nhp->rtnh_hops = nh->nh_weight - 1;
- nhp->rtnh_ifindex = nh->nh_oif;
-
- if (nh->nh_gw &&
- nla_put_le16(skb, RTA_GATEWAY, nh->nh_gw) < 0)
- goto errout;
-
- nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
- } endfor_nexthops(fi);
-
- nla_nest_end(skb, mp_head);
- }
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
- struct nlmsghdr *nlh, struct netlink_skb_parms *req)
-{
- struct sk_buff *skb;
- u32 portid = req ? req->portid : 0;
- int err = -ENOBUFS;
-
- skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
- if (skb == NULL)
- goto errout;
-
- err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id,
- f->fn_type, f->fn_scope, &f->fn_key, z,
- DN_FIB_INFO(f), 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
- return;
-errout:
- if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
-}
-
-static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct dn_fib_table *tb,
- struct dn_zone *dz,
- struct dn_fib_node *f)
-{
- int i, s_i;
-
- s_i = cb->args[4];
- for(i = 0; f; i++, f = f->fn_next) {
- if (i < s_i)
- continue;
- if (f->fn_state & DN_S_ZOMBIE)
- continue;
- if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->n,
- (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
- f->fn_scope, &f->fn_key, dz->dz_order,
- f->fn_info, NLM_F_MULTI) < 0) {
- cb->args[4] = i;
- return -1;
- }
- }
- cb->args[4] = i;
- return skb->len;
-}
-
-static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct dn_fib_table *tb,
- struct dn_zone *dz)
-{
- int h, s_h;
-
- s_h = cb->args[3];
- for(h = 0; h < dz->dz_divisor; h++) {
- if (h < s_h)
- continue;
- if (h > s_h)
- memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
- if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
- continue;
- if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
- cb->args[3] = h;
- return -1;
- }
- }
- cb->args[3] = h;
- return skb->len;
-}
-
-static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- int m, s_m;
- struct dn_zone *dz;
- struct dn_hash *table = (struct dn_hash *)tb->data;
-
- s_m = cb->args[2];
- read_lock(&dn_fib_tables_lock);
- for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
- if (m < s_m)
- continue;
- if (m > s_m)
- memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
-
- if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
- cb->args[2] = m;
- read_unlock(&dn_fib_tables_lock);
- return -1;
- }
- }
- read_unlock(&dn_fib_tables_lock);
- cb->args[2] = m;
-
- return skb->len;
-}
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
- unsigned int e = 0, s_e;
- struct dn_fib_table *tb;
- int dumped = 0;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
- ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
- return dn_cache_dump(skb, cb);
-
- s_h = cb->args[0];
- s_e = cb->args[1];
-
- for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
- e = 0;
- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) {
- if (e < s_e)
- goto next;
- if (dumped)
- memset(&cb->args[2], 0, sizeof(cb->args) -
- 2 * sizeof(cb->args[0]));
- if (tb->dump(tb, skb, cb) < 0)
- goto out;
- dumped = 1;
-next:
- e++;
- }
- }
-out:
- cb->args[1] = e;
- cb->args[0] = h;
-
- return skb->len;
-}
-
-static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
- struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
- struct dn_hash *table = (struct dn_hash *)tb->data;
- struct dn_fib_node *new_f, *f, **fp, **del_fp;
- struct dn_zone *dz;
- struct dn_fib_info *fi;
- int z = r->rtm_dst_len;
- int type = r->rtm_type;
- dn_fib_key_t key;
- int err;
-
- if (z > 16)
- return -EINVAL;
-
- dz = table->dh_zones[z];
- if (!dz && !(dz = dn_new_zone(table, z)))
- return -ENOBUFS;
-
- dz_key_0(key);
- if (attrs[RTA_DST]) {
- __le16 dst = nla_get_le16(attrs[RTA_DST]);
- if (dst & ~DZ_MASK(dz))
- return -EINVAL;
- key = dz_key(dst, dz);
- }
-
- if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
- return err;
-
- if (dz->dz_nent > (dz->dz_divisor << 2) &&
- dz->dz_divisor > DN_MAX_DIVISOR &&
- (z==16 || (1<<z) > dz->dz_divisor))
- dn_rehash_zone(dz);
-
- fp = dn_chain_p(key, dz);
-
- DN_FIB_SCAN(f, fp) {
- if (dn_key_leq(key, f->fn_key))
- break;
- }
-
- del_fp = NULL;
-
- if (f && (f->fn_state & DN_S_ZOMBIE) &&
- dn_key_eq(f->fn_key, key)) {
- del_fp = fp;
- fp = &f->fn_next;
- f = *fp;
- goto create;
- }
-
- DN_FIB_SCAN_KEY(f, fp, key) {
- if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority)
- break;
- }
-
- if (f && dn_key_eq(f->fn_key, key) &&
- fi->fib_priority == DN_FIB_INFO(f)->fib_priority) {
- struct dn_fib_node **ins_fp;
-
- err = -EEXIST;
- if (n->nlmsg_flags & NLM_F_EXCL)
- goto out;
-
- if (n->nlmsg_flags & NLM_F_REPLACE) {
- del_fp = fp;
- fp = &f->fn_next;
- f = *fp;
- goto replace;
- }
-
- ins_fp = fp;
- err = -EEXIST;
-
- DN_FIB_SCAN_KEY(f, fp, key) {
- if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority)
- break;
- if (f->fn_type == type &&
- f->fn_scope == r->rtm_scope &&
- DN_FIB_INFO(f) == fi)
- goto out;
- }
-
- if (!(n->nlmsg_flags & NLM_F_APPEND)) {
- fp = ins_fp;
- f = *fp;
- }
- }
-
-create:
- err = -ENOENT;
- if (!(n->nlmsg_flags & NLM_F_CREATE))
- goto out;
-
-replace:
- err = -ENOBUFS;
- new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL);
- if (new_f == NULL)
- goto out;
-
- new_f->fn_key = key;
- new_f->fn_type = type;
- new_f->fn_scope = r->rtm_scope;
- DN_FIB_INFO(new_f) = fi;
-
- new_f->fn_next = f;
- write_lock_bh(&dn_fib_tables_lock);
- *fp = new_f;
- write_unlock_bh(&dn_fib_tables_lock);
- dz->dz_nent++;
-
- if (del_fp) {
- f = *del_fp;
- write_lock_bh(&dn_fib_tables_lock);
- *del_fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- if (!(f->fn_state & DN_S_ZOMBIE))
- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
- if (f->fn_state & DN_S_ACCESSED)
- dn_rt_cache_flush(-1);
- dn_free_node(f);
- dz->dz_nent--;
- } else {
- dn_rt_cache_flush(-1);
- }
-
- dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req);
-
- return 0;
-out:
- dn_fib_release_info(fi);
- return err;
-}
-
-
-static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
- struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
- struct dn_hash *table = (struct dn_hash*)tb->data;
- struct dn_fib_node **fp, **del_fp, *f;
- int z = r->rtm_dst_len;
- struct dn_zone *dz;
- dn_fib_key_t key;
- int matched;
-
-
- if (z > 16)
- return -EINVAL;
-
- if ((dz = table->dh_zones[z]) == NULL)
- return -ESRCH;
-
- dz_key_0(key);
- if (attrs[RTA_DST]) {
- __le16 dst = nla_get_le16(attrs[RTA_DST]);
- if (dst & ~DZ_MASK(dz))
- return -EINVAL;
- key = dz_key(dst, dz);
- }
-
- fp = dn_chain_p(key, dz);
-
- DN_FIB_SCAN(f, fp) {
- if (dn_key_eq(f->fn_key, key))
- break;
- if (dn_key_leq(key, f->fn_key))
- return -ESRCH;
- }
-
- matched = 0;
- del_fp = NULL;
- DN_FIB_SCAN_KEY(f, fp, key) {
- struct dn_fib_info *fi = DN_FIB_INFO(f);
-
- if (f->fn_state & DN_S_ZOMBIE)
- return -ESRCH;
-
- matched++;
-
- if (del_fp == NULL &&
- (!r->rtm_type || f->fn_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- dn_fib_nh_match(r, n, attrs, fi) == 0)
- del_fp = fp;
- }
-
- if (del_fp) {
- f = *del_fp;
- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
-
- if (matched != 1) {
- write_lock_bh(&dn_fib_tables_lock);
- *del_fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- if (f->fn_state & DN_S_ACCESSED)
- dn_rt_cache_flush(-1);
- dn_free_node(f);
- dz->dz_nent--;
- } else {
- f->fn_state |= DN_S_ZOMBIE;
- if (f->fn_state & DN_S_ACCESSED) {
- f->fn_state &= ~DN_S_ACCESSED;
- dn_rt_cache_flush(-1);
- }
- if (++dn_fib_hash_zombies > 128)
- dn_fib_flush();
- }
-
- return 0;
- }
-
- return -ESRCH;
-}
-
-static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table)
-{
- int found = 0;
- struct dn_fib_node *f;
-
- while((f = *fp) != NULL) {
- struct dn_fib_info *fi = DN_FIB_INFO(f);
-
- if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) {
- write_lock_bh(&dn_fib_tables_lock);
- *fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- dn_free_node(f);
- found++;
- continue;
- }
- fp = &f->fn_next;
- }
-
- return found;
-}
-
-static int dn_fib_table_flush(struct dn_fib_table *tb)
-{
- struct dn_hash *table = (struct dn_hash *)tb->data;
- struct dn_zone *dz;
- int found = 0;
-
- dn_fib_hash_zombies = 0;
- for(dz = table->dh_zone_list; dz; dz = dz->dz_next) {
- int i;
- int tmp = 0;
- for(i = dz->dz_divisor-1; i >= 0; i--)
- tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table);
- dz->dz_nent -= tmp;
- found += tmp;
- }
-
- return found;
-}
-
-static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res)
-{
- int err;
- struct dn_zone *dz;
- struct dn_hash *t = (struct dn_hash *)tb->data;
-
- read_lock(&dn_fib_tables_lock);
- for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
- struct dn_fib_node *f;
- dn_fib_key_t k = dz_key(flp->daddr, dz);
-
- for(f = dz_chain(k, dz); f; f = f->fn_next) {
- if (!dn_key_eq(k, f->fn_key)) {
- if (dn_key_leq(k, f->fn_key))
- break;
- else
- continue;
- }
-
- f->fn_state |= DN_S_ACCESSED;
-
- if (f->fn_state&DN_S_ZOMBIE)
- continue;
-
- if (f->fn_scope < flp->flowidn_scope)
- continue;
-
- err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
-
- if (err == 0) {
- res->type = f->fn_type;
- res->scope = f->fn_scope;
- res->prefixlen = dz->dz_order;
- goto out;
- }
- if (err < 0)
- goto out;
- }
- }
- err = 1;
-out:
- read_unlock(&dn_fib_tables_lock);
- return err;
-}
-
-
-struct dn_fib_table *dn_fib_get_table(u32 n, int create)
-{
- struct dn_fib_table *t;
- unsigned int h;
-
- if (n < RT_TABLE_MIN)
- return NULL;
-
- if (n > RT_TABLE_MAX)
- return NULL;
-
- h = n & (DN_FIB_TABLE_HASHSZ - 1);
- rcu_read_lock();
- hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) {
- if (t->n == n) {
- rcu_read_unlock();
- return t;
- }
- }
- rcu_read_unlock();
-
- if (!create)
- return NULL;
-
- if (in_interrupt()) {
- net_dbg_ratelimited("DECnet: BUG! Attempt to create routing table from interrupt\n");
- return NULL;
- }
-
- t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash),
- GFP_KERNEL);
- if (t == NULL)
- return NULL;
-
- t->n = n;
- t->insert = dn_fib_table_insert;
- t->delete = dn_fib_table_delete;
- t->lookup = dn_fib_table_lookup;
- t->flush = dn_fib_table_flush;
- t->dump = dn_fib_table_dump;
- hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
-
- return t;
-}
-
-struct dn_fib_table *dn_fib_empty_table(void)
-{
- u32 id;
-
- for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
- if (dn_fib_get_table(id, 0) == NULL)
- return dn_fib_get_table(id, 1);
- return NULL;
-}
-
-void dn_fib_flush(void)
-{
- int flushed = 0;
- struct dn_fib_table *tb;
- unsigned int h;
-
- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist)
- flushed += tb->flush(tb);
- }
-
- if (flushed)
- dn_rt_cache_flush(-1);
-}
-
-void __init dn_fib_table_init(void)
-{
- dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
- sizeof(struct dn_fib_info),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
-}
-
-void __exit dn_fib_table_cleanup(void)
-{
- struct dn_fib_table *t;
- struct hlist_node *next;
- unsigned int h;
-
- write_lock(&dn_fib_tables_lock);
- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h],
- hlist) {
- hlist_del(&t->hlist);
- kfree(t);
- }
- }
- write_unlock(&dn_fib_tables_lock);
-}
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
deleted file mode 100644
index aa4155875ca8..000000000000
--- a/net/decnet/dn_timer.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Socket Timer Functions
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse : Made keepalive timer part of the same
- * timer idea.
- * Steve Whitehouse : Added checks for sk->sock_readers
- * David S. Miller : New socket locking
- * Steve Whitehouse : Timer grabs socket ref.
- */
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <net/sock.h>
-#include <linux/atomic.h>
-#include <linux/jiffies.h>
-#include <net/flow.h>
-#include <net/dn.h>
-
-/*
- * Slow timer is for everything else (n * 500mS)
- */
-
-#define SLOW_INTERVAL (HZ/2)
-
-static void dn_slow_timer(struct timer_list *t);
-
-void dn_start_slow_timer(struct sock *sk)
-{
- timer_setup(&sk->sk_timer, dn_slow_timer, 0);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
-}
-
-void dn_stop_slow_timer(struct sock *sk)
-{
- sk_stop_timer(sk, &sk->sk_timer);
-}
-
-static void dn_slow_timer(struct timer_list *t)
-{
- struct sock *sk = from_timer(sk, t, sk_timer);
- struct dn_scp *scp = DN_SK(sk);
-
- bh_lock_sock(sk);
-
- if (sock_owned_by_user(sk)) {
- sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10);
- goto out;
- }
-
- /*
- * The persist timer is the standard slow timer used for retransmits
- * in both connection establishment and disconnection as well as
- * in the RUN state. The different states are catered for by changing
- * the function pointer in the socket. Setting the timer to a value
- * of zero turns it off. We allow the persist_fxn to turn the
- * timer off in a permant way by returning non-zero, so that
- * timer based routines may remove sockets. This is why we have a
- * sock_hold()/sock_put() around the timer to prevent the socket
- * going away in the middle.
- */
- if (scp->persist && scp->persist_fxn) {
- if (scp->persist <= SLOW_INTERVAL) {
- scp->persist = 0;
-
- if (scp->persist_fxn(sk))
- goto out;
- } else {
- scp->persist -= SLOW_INTERVAL;
- }
- }
-
- /*
- * Check for keepalive timeout. After the other timer 'cos if
- * the previous timer caused a retransmit, we don't need to
- * do this. scp->stamp is the last time that we sent a packet.
- * The keepalive function sends a link service packet to the
- * other end. If it remains unacknowledged, the standard
- * socket timers will eventually shut the socket down. Each
- * time we do this, scp->stamp will be updated, thus
- * we won't try and send another until scp->keepalive has passed
- * since the last successful transmission.
- */
- if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) {
- if (time_after_eq(jiffies, scp->stamp + scp->keepalive))
- scp->keepalive_fxn(sk);
- }
-
- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
deleted file mode 100644
index 14ec4ef95fab..000000000000
--- a/net/decnet/netfilter/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# DECnet netfilter configuration
-#
-
-menu "DECnet: Netfilter Configuration"
- depends on DECNET && NETFILTER
- depends on NETFILTER_ADVANCED
-
-config DECNET_NF_GRABULATOR
- tristate "Routing message grabulator (for userland routing daemon)"
- help
- Enable this module if you want to use the userland DECnet routing
- daemon. You will also need to enable routing support for DECnet
- unless you just want to monitor routing messages from other nodes.
-
-endmenu
diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile
deleted file mode 100644
index 429c84289d0f..000000000000
--- a/net/decnet/netfilter/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for DECnet netfilter modules
-#
-
-obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
deleted file mode 100644
index 26a9193df783..000000000000
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Message Grabulator
- *
- * (C) 2000 ChyGwyn Limited - https://www.chygwyn.com/
- *
- * Author: Steven Whitehouse <steve@chygwyn.com>
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/spinlock.h>
-#include <net/netlink.h>
-#include <linux/netfilter_decnet.h>
-
-#include <net/sock.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-
-static struct sock *dnrmg = NULL;
-
-
-static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
-{
- struct sk_buff *skb = NULL;
- size_t size;
- sk_buff_data_t old_tail;
- struct nlmsghdr *nlh;
- unsigned char *ptr;
- struct nf_dn_rtmsg *rtm;
-
- size = NLMSG_ALIGN(rt_skb->len) +
- NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
- skb = nlmsg_new(size, GFP_ATOMIC);
- if (!skb) {
- *errp = -ENOMEM;
- return NULL;
- }
- old_tail = skb->tail;
- nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
- if (!nlh) {
- kfree_skb(skb);
- *errp = -ENOMEM;
- return NULL;
- }
- rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
- rtm->nfdn_ifindex = rt_skb->dev->ifindex;
- ptr = NFDN_RTMSG(rtm);
- skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
- nlh->nlmsg_len = skb->tail - old_tail;
- return skb;
-}
-
-static void dnrmg_send_peer(struct sk_buff *skb)
-{
- struct sk_buff *skb2;
- int status = 0;
- int group = 0;
- unsigned char flags = *skb->data;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_L1RT:
- group = DNRNG_NLGRP_L1;
- break;
- case DN_RT_PKT_L2RT:
- group = DNRNG_NLGRP_L2;
- break;
- default:
- return;
- }
-
- skb2 = dnrmg_build_message(skb, &status);
- if (skb2 == NULL)
- return;
- NETLINK_CB(skb2).dst_group = group;
- netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
-}
-
-
-static unsigned int dnrmg_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- dnrmg_send_peer(skb);
- return NF_ACCEPT;
-}
-
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0)
-
-static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
-{
- struct nlmsghdr *nlh = nlmsg_hdr(skb);
-
- if (skb->len < sizeof(*nlh) ||
- nlh->nlmsg_len < sizeof(*nlh) ||
- skb->len < nlh->nlmsg_len)
- return;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- RCV_SKB_FAIL(-EPERM);
-
- /* Eventually we might send routing messages too */
-
- RCV_SKB_FAIL(-EINVAL);
-}
-
-static const struct nf_hook_ops dnrmg_ops = {
- .hook = dnrmg_hook,
- .pf = NFPROTO_DECNET,
- .hooknum = NF_DN_ROUTE,
- .priority = NF_DN_PRI_DNRTMSG,
-};
-
-static int __init dn_rtmsg_init(void)
-{
- int rv = 0;
- struct netlink_kernel_cfg cfg = {
- .groups = DNRNG_NLGRP_MAX,
- .input = dnrmg_receive_user_skb,
- };
-
- dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg);
- if (dnrmg == NULL) {
- printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
- return -ENOMEM;
- }
-
- rv = nf_register_net_hook(&init_net, &dnrmg_ops);
- if (rv) {
- netlink_kernel_release(dnrmg);
- }
-
- return rv;
-}
-
-static void __exit dn_rtmsg_fini(void)
-{
- nf_unregister_net_hook(&init_net, &dnrmg_ops);
- netlink_kernel_release(dnrmg);
-}
-
-
-MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
-MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
-
-module_init(dn_rtmsg_init);
-module_exit(dn_rtmsg_fini);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
deleted file mode 100644
index 67b5ab2657b7..000000000000
--- a/net/decnet/sysctl_net_decnet.c
+++ /dev/null
@@ -1,362 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet sysctl support functions
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse - C99 changes and default device handling
- * Steve Whitehouse - Memory buffer settings, like the tcp ones
- *
- */
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/fs.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-
-#include <linux/uaccess.h>
-
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-
-int decnet_debug_level;
-int decnet_time_wait = 30;
-int decnet_dn_count = 1;
-int decnet_di_count = 3;
-int decnet_dr_count = 3;
-int decnet_log_martians = 1;
-int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
-
-/* Reasonable defaults, I hope, based on tcp's defaults */
-long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
-int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
-int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
-
-#ifdef CONFIG_SYSCTL
-extern int decnet_dst_gc_interval;
-static int min_decnet_time_wait[] = { 5 };
-static int max_decnet_time_wait[] = { 600 };
-static int min_state_count[] = { 1 };
-static int max_state_count[] = { NSP_MAXRXTSHIFT };
-static int min_decnet_dst_gc_interval[] = { 1 };
-static int max_decnet_dst_gc_interval[] = { 60 };
-static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW };
-static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW };
-static char node_name[7] = "???";
-
-static struct ctl_table_header *dn_table_header = NULL;
-
-/*
- * ctype.h :-)
- */
-#define ISNUM(x) (((x) >= '0') && ((x) <= '9'))
-#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z'))
-#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z'))
-#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x))
-#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x))
-
-static void strip_it(char *str)
-{
- for(;;) {
- switch (*str) {
- case ' ':
- case '\n':
- case '\r':
- case ':':
- *str = 0;
- fallthrough;
- case 0:
- return;
- }
- str++;
- }
-}
-
-/*
- * Simple routine to parse an ascii DECnet address
- * into a network order address.
- */
-static int parse_addr(__le16 *addr, char *str)
-{
- __u16 area, node;
-
- while(*str && !ISNUM(*str)) str++;
-
- if (*str == 0)
- return -1;
-
- area = (*str++ - '0');
- if (ISNUM(*str)) {
- area *= 10;
- area += (*str++ - '0');
- }
-
- if (*str++ != '.')
- return -1;
-
- if (!ISNUM(*str))
- return -1;
-
- node = *str++ - '0';
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
-
- if ((node > 1023) || (area > 63))
- return -1;
-
- if (INVALID_END_CHAR(*str))
- return -1;
-
- *addr = cpu_to_le16((area << 10) | node);
-
- return 0;
-}
-
-static int dn_node_address_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- char addr[DN_ASCBUF_LEN];
- size_t len;
- __le16 dnaddr;
-
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
- memcpy(addr, buffer, len);
- addr[len] = 0;
- strip_it(addr);
-
- if (parse_addr(&dnaddr, addr))
- return -EINVAL;
-
- dn_dev_devices_off();
-
- decnet_address = dnaddr;
-
- dn_dev_devices_on();
-
- *ppos += len;
-
- return 0;
- }
-
- dn_addr2asc(le16_to_cpu(decnet_address), addr);
- len = strlen(addr);
- addr[len++] = '\n';
-
- if (len > *lenp)
- len = *lenp;
- memcpy(buffer, addr, len);
- *lenp = len;
- *ppos += len;
-
- return 0;
-}
-
-static int dn_def_dev_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- size_t len;
- struct net_device *dev;
- char devname[17];
-
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- if (*lenp > 16)
- return -E2BIG;
-
- memcpy(devname, buffer, *lenp);
- devname[*lenp] = 0;
- strip_it(devname);
-
- dev = dev_get_by_name(&init_net, devname);
- if (dev == NULL)
- return -ENODEV;
-
- if (dev->dn_ptr == NULL) {
- dev_put(dev);
- return -ENODEV;
- }
-
- if (dn_dev_set_default(dev, 1)) {
- dev_put(dev);
- return -ENODEV;
- }
- *ppos += *lenp;
-
- return 0;
- }
-
- dev = dn_dev_get_default();
- if (dev == NULL) {
- *lenp = 0;
- return 0;
- }
-
- strcpy(devname, dev->name);
- dev_put(dev);
- len = strlen(devname);
- devname[len++] = '\n';
-
- if (len > *lenp) len = *lenp;
-
- memcpy(buffer, devname, len);
- *lenp = len;
- *ppos += len;
-
- return 0;
-}
-
-static struct ctl_table dn_table[] = {
- {
- .procname = "node_address",
- .maxlen = 7,
- .mode = 0644,
- .proc_handler = dn_node_address_handler,
- },
- {
- .procname = "node_name",
- .data = node_name,
- .maxlen = 7,
- .mode = 0644,
- .proc_handler = proc_dostring,
- },
- {
- .procname = "default_device",
- .maxlen = 16,
- .mode = 0644,
- .proc_handler = dn_def_dev_handler,
- },
- {
- .procname = "time_wait",
- .data = &decnet_time_wait,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_time_wait,
- .extra2 = &max_decnet_time_wait
- },
- {
- .procname = "dn_count",
- .data = &decnet_dn_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "di_count",
- .data = &decnet_di_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "dr_count",
- .data = &decnet_dr_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "dst_gc_interval",
- .data = &decnet_dst_gc_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_dst_gc_interval,
- .extra2 = &max_decnet_dst_gc_interval
- },
- {
- .procname = "no_fc_max_cwnd",
- .data = &decnet_no_fc_max_cwnd,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_no_fc_max_cwnd,
- .extra2 = &max_decnet_no_fc_max_cwnd
- },
- {
- .procname = "decnet_mem",
- .data = &sysctl_decnet_mem,
- .maxlen = sizeof(sysctl_decnet_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
- .procname = "decnet_rmem",
- .data = &sysctl_decnet_rmem,
- .maxlen = sizeof(sysctl_decnet_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "decnet_wmem",
- .data = &sysctl_decnet_wmem,
- .maxlen = sizeof(sysctl_decnet_wmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "debug",
- .data = &decnet_debug_level,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-
-void dn_register_sysctl(void)
-{
- dn_table_header = register_net_sysctl(&init_net, "net/decnet", dn_table);
-}
-
-void dn_unregister_sysctl(void)
-{
- unregister_net_sysctl_table(dn_table_header);
-}
-
-#else /* CONFIG_SYSCTL */
-void dn_unregister_sysctl(void)
-{
-}
-void dn_register_sysctl(void)
-{
-}
-
-#endif
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 8cb87b5067ee..3eef72ce99a4 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -87,10 +87,10 @@ config NET_DSA_TAG_MTK
Mediatek switches.
config NET_DSA_TAG_KSZ
- tristate "Tag driver for Microchip 8795/9477/9893 families of switches"
+ tristate "Tag driver for Microchip 8795/937x/9477/9893 families of switches"
help
Say Y if you want to enable support for tagging frames for the
- Microchip 8795/9477/9893 families of switches.
+ Microchip 8795/937x/9477/9893 families of switches.
config NET_DSA_TAG_OCELOT
tristate "Tag driver for Ocelot family of switches, using NPI port"
@@ -132,6 +132,13 @@ config NET_DSA_TAG_RTL8_4
Say Y or M if you want to enable support for tagging frames for Realtek
switches with 8 byte protocol 4 tags, such as the Realtek RTL8365MB-VC.
+config NET_DSA_TAG_RZN1_A5PSW
+ tristate "Tag driver for Renesas RZ/N1 A5PSW switch"
+ help
+ Say Y or M if you want to enable support for tagging frames for
+ Renesas RZ/N1 embedded switch that uses an 8 byte tag located after
+ destination MAC address.
+
config NET_DSA_TAG_LAN9303
tristate "Tag driver for SMSC/Microchip LAN9303 family of switches"
help
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9f75820e7c98..bf57ef3bce2a 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,7 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o
+dsa_core-y += \
+ dsa.o \
+ dsa2.o \
+ master.o \
+ netlink.o \
+ port.o \
+ slave.o \
+ switch.o \
+ tag_8021q.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
@@ -17,6 +25,7 @@ obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o
obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o
+obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d9d0d227092c..64b14f655b23 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -7,19 +7,10 @@
#include <linux/device.h>
#include <linux/list.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/of.h>
-#include <linux/of_mdio.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
#include <linux/netdevice.h>
#include <linux/sysfs.h>
-#include <linux/phy_fixed.h>
#include <linux/ptp_classify.h>
-#include <linux/etherdevice.h>
#include "dsa_priv.h"
@@ -349,6 +340,7 @@ void dsa_flush_workqueue(void)
{
flush_workqueue(dsa_owq);
}
+EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
@@ -466,6 +458,66 @@ struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
}
EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
+bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b)
+{
+ if (a->type != b->type)
+ return false;
+
+ switch (a->type) {
+ case DSA_DB_PORT:
+ return a->dp == b->dp;
+ case DSA_DB_LAG:
+ return a->lag.dev == b->lag.dev;
+ case DSA_DB_BRIDGE:
+ return a->bridge.num == b->bridge.num;
+ default:
+ WARN_ON(1);
+ return false;
+ }
+}
+
+bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+
+ lockdep_assert_held(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->fdbs, list) {
+ if (!ether_addr_equal(a->addr, addr) || a->vid != vid)
+ continue;
+
+ if (a->db.type == db.type && !dsa_db_equal(&a->db, &db))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(dsa_fdb_present_in_other_db);
+
+bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+
+ lockdep_assert_held(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->mdbs, list) {
+ if (!ether_addr_equal(a->addr, mdb->addr) || a->vid != mdb->vid)
+ continue;
+
+ if (a->db.type == db.type && !dsa_db_equal(&a->db, &db))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db);
+
static int __init dsa_init_module(void)
{
int rc;
@@ -484,8 +536,16 @@ static int __init dsa_init_module(void)
dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops),
THIS_MODULE);
+ rc = rtnl_link_register(&dsa_link_ops);
+ if (rc)
+ goto netlink_register_fail;
+
return 0;
+netlink_register_fail:
+ dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops));
+ dsa_slave_unregister_notifier();
+ dev_remove_pack(&dsa_pack_type);
register_notifier_fail:
destroy_workqueue(dsa_owq);
@@ -495,6 +555,7 @@ module_init(dsa_init_module);
static void __exit dsa_cleanup_module(void)
{
+ rtnl_link_unregister(&dsa_link_ops);
dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops));
dsa_slave_unregister_notifier();
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3d21521453fe..e504a18fc125 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -13,8 +13,10 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/of.h>
+#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <net/devlink.h>
+#include <net/sch_generic.h>
#include "dsa_priv.h"
@@ -71,27 +73,24 @@ int dsa_broadcast(unsigned long e, void *v)
}
/**
- * dsa_lag_map() - Map LAG netdev to a linear LAG ID
+ * dsa_lag_map() - Map LAG structure to a linear LAG array
* @dst: Tree in which to record the mapping.
- * @lag: Netdev that is to be mapped to an ID.
+ * @lag: LAG structure that is to be mapped to the tree's array.
*
- * dsa_lag_id/dsa_lag_dev can then be used to translate between the
+ * dsa_lag_id/dsa_lag_by_id can then be used to translate between the
* two spaces. The size of the mapping space is determined by the
* driver by setting ds->num_lag_ids. It is perfectly legal to leave
* it unset if it is not needed, in which case these functions become
* no-ops.
*/
-void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag)
{
unsigned int id;
- if (dsa_lag_id(dst, lag) >= 0)
- /* Already mapped */
- return;
-
- for (id = 0; id < dst->lags_len; id++) {
- if (!dsa_lag_dev(dst, id)) {
- dst->lags[id] = lag;
+ for (id = 1; id <= dst->lags_len; id++) {
+ if (!dsa_lag_by_id(dst, id)) {
+ dst->lags[id - 1] = lag;
+ lag->id = id;
return;
}
}
@@ -107,28 +106,36 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
/**
* dsa_lag_unmap() - Remove a LAG ID mapping
* @dst: Tree in which the mapping is recorded.
- * @lag: Netdev that was mapped.
+ * @lag: LAG structure that was mapped.
*
* As there may be multiple users of the mapping, it is only removed
* if there are no other references to it.
*/
-void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag)
{
- struct dsa_port *dp;
unsigned int id;
- dsa_lag_foreach_port(dp, dst, lag)
- /* There are remaining users of this mapping */
- return;
-
dsa_lags_foreach_id(id, dst) {
- if (dsa_lag_dev(dst, id) == lag) {
- dst->lags[id] = NULL;
+ if (dsa_lag_by_id(dst, id) == lag) {
+ dst->lags[id - 1] = NULL;
+ lag->id = 0;
break;
}
}
}
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_lag_dev_get(dp) == lag_dev)
+ return dp->lag;
+
+ return NULL;
+}
+
struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
const struct net_device *br)
{
@@ -380,6 +387,20 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
return NULL;
}
+struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst)
+{
+ struct device_node *ethernet;
+ struct net_device *master;
+ struct dsa_port *cpu_dp;
+
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0);
+ master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
+
+ return master;
+}
+
/* Assign the default CPU port (the first one in the tree) to all ports of the
* fabric which don't already have one as part of their own switch.
*/
@@ -440,6 +461,72 @@ static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
dp->cpu_dp = NULL;
}
+static int dsa_port_devlink_setup(struct dsa_port *dp)
+{
+ struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch_tree *dst = dp->ds->dst;
+ struct devlink_port_attrs attrs = {};
+ struct devlink *dl = dp->ds->devlink;
+ struct dsa_switch *ds = dp->ds;
+ const unsigned char *id;
+ unsigned char len;
+ int err;
+
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_init(dl, dlp);
+
+ if (ds->ops->port_setup) {
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
+ id = (const unsigned char *)&dst->index;
+ len = sizeof(dst->index);
+
+ attrs.phys.port_number = dp->index;
+ memcpy(attrs.switch_id.id, id, len);
+ attrs.switch_id.id_len = len;
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED;
+ break;
+ case DSA_PORT_TYPE_CPU:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
+ break;
+ case DSA_PORT_TYPE_DSA:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
+ break;
+ case DSA_PORT_TYPE_USER:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ break;
+ }
+
+ devlink_port_attrs_set(dlp, &attrs);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err) {
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+ return err;
+ }
+
+ return 0;
+}
+
+static void dsa_port_devlink_teardown(struct dsa_port *dp)
+{
+ struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch *ds = dp->ds;
+
+ devlink_port_unregister(dlp);
+
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+
+ devlink_port_fini(dlp);
+}
+
static int dsa_port_setup(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
@@ -451,25 +538,25 @@ static int dsa_port_setup(struct dsa_port *dp)
if (dp->setup)
return 0;
- mutex_init(&dp->addr_lists_lock);
- INIT_LIST_HEAD(&dp->fdbs);
- INIT_LIST_HEAD(&dp->mdbs);
-
- if (ds->ops->port_setup) {
- err = ds->ops->port_setup(ds, dp->index);
- if (err)
- return err;
- }
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
break;
case DSA_PORT_TYPE_CPU:
- err = dsa_port_link_register_of(dp);
- if (err)
- break;
- dsa_port_link_registered = true;
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for CPU port %d\n",
+ dp->index);
+ }
err = dsa_port_enable(dp, NULL);
if (err)
@@ -478,10 +565,16 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_DSA:
- err = dsa_port_link_register_of(dp);
- if (err)
- break;
- dsa_port_link_registered = true;
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for DSA port %d\n",
+ dp->index);
+ }
err = dsa_port_enable(dp, NULL);
if (err)
@@ -502,10 +595,9 @@ static int dsa_port_setup(struct dsa_port *dp)
if (err && dsa_port_enabled)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
- dsa_port_link_unregister_of(dp);
+ dsa_shared_port_link_unregister_of(dp);
if (err) {
- if (ds->ops->port_teardown)
- ds->ops->port_teardown(ds, dp->index);
+ dsa_port_devlink_teardown(dp);
return err;
}
@@ -514,61 +606,13 @@ static int dsa_port_setup(struct dsa_port *dp)
return 0;
}
-static int dsa_port_devlink_setup(struct dsa_port *dp)
-{
- struct devlink_port *dlp = &dp->devlink_port;
- struct dsa_switch_tree *dst = dp->ds->dst;
- struct devlink_port_attrs attrs = {};
- struct devlink *dl = dp->ds->devlink;
- const unsigned char *id;
- unsigned char len;
- int err;
-
- id = (const unsigned char *)&dst->index;
- len = sizeof(dst->index);
-
- attrs.phys.port_number = dp->index;
- memcpy(attrs.switch_id.id, id, len);
- attrs.switch_id.id_len = len;
- memset(dlp, 0, sizeof(*dlp));
-
- switch (dp->type) {
- case DSA_PORT_TYPE_UNUSED:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED;
- break;
- case DSA_PORT_TYPE_CPU:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
- break;
- case DSA_PORT_TYPE_DSA:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
- break;
- case DSA_PORT_TYPE_USER:
- attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
- break;
- }
-
- devlink_port_attrs_set(dlp, &attrs);
- err = devlink_port_register(dl, dlp, dp->index);
-
- if (!err)
- dp->devlink_port_setup = true;
-
- return err;
-}
-
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
- struct dsa_switch *ds = dp->ds;
- struct dsa_mac_addr *a, *tmp;
- struct net_device *slave;
if (!dp->setup)
return;
- if (ds->ops->port_teardown)
- ds->ops->port_teardown(ds, dp->index);
-
devlink_port_type_clear(dlp);
switch (dp->type) {
@@ -576,72 +620,31 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
dsa_port_disable(dp);
- dsa_port_link_unregister_of(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_DSA:
dsa_port_disable(dp);
- dsa_port_link_unregister_of(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
- slave = dp->slave;
-
- if (slave) {
+ if (dp->slave) {
+ dsa_slave_destroy(dp->slave);
dp->slave = NULL;
- dsa_slave_destroy(slave);
}
break;
}
- list_for_each_entry_safe(a, tmp, &dp->fdbs, list) {
- list_del(&a->list);
- kfree(a);
- }
-
- list_for_each_entry_safe(a, tmp, &dp->mdbs, list) {
- list_del(&a->list);
- kfree(a);
- }
+ dsa_port_devlink_teardown(dp);
dp->setup = false;
}
-static void dsa_port_devlink_teardown(struct dsa_port *dp)
+static int dsa_port_setup_as_unused(struct dsa_port *dp)
{
- struct devlink_port *dlp = &dp->devlink_port;
-
- if (dp->devlink_port_setup)
- devlink_port_unregister(dlp);
- dp->devlink_port_setup = false;
-}
-
-/* Destroy the current devlink port, and create a new one which has the UNUSED
- * flavour. At this point, any call to ds->ops->port_setup has been already
- * balanced out by a call to ds->ops->port_teardown, so we know that any
- * devlink port regions the driver had are now unregistered. We then call its
- * ds->ops->port_setup again, in order for the driver to re-create them on the
- * new devlink port.
- */
-static int dsa_port_reinit_as_unused(struct dsa_port *dp)
-{
- struct dsa_switch *ds = dp->ds;
- int err;
-
- dsa_port_devlink_teardown(dp);
dp->type = DSA_PORT_TYPE_UNUSED;
- err = dsa_port_devlink_setup(dp);
- if (err)
- return err;
-
- if (ds->ops->port_setup) {
- /* On error, leave the devlink port registered,
- * dsa_switch_teardown will clean it up later.
- */
- err = ds->ops->port_setup(ds, dp->index);
- if (err)
- return err;
- }
-
- return 0;
+ return dsa_port_setup(dp);
}
static int dsa_devlink_info_get(struct devlink *dl,
@@ -821,22 +824,18 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
{
const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
struct dsa_switch_tree *dst = ds->dst;
- struct dsa_port *cpu_dp;
int err;
if (tag_ops->proto == dst->default_proto)
goto connect;
- dsa_switch_for_each_cpu_port(cpu_dp, ds) {
- rtnl_lock();
- err = ds->ops->change_tag_protocol(ds, cpu_dp->index,
- tag_ops->proto);
- rtnl_unlock();
- if (err) {
- dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
- tag_ops->name, ERR_PTR(err));
- return err;
- }
+ rtnl_lock();
+ err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
+ rtnl_unlock();
+ if (err) {
+ dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
+ tag_ops->name, ERR_PTR(err));
+ return err;
}
connect:
@@ -868,7 +867,7 @@ disconnect:
static int dsa_switch_setup(struct dsa_switch *ds)
{
struct dsa_devlink_priv *dl_priv;
- struct dsa_port *dp;
+ struct device_node *dn;
int err;
if (ds->setup)
@@ -891,18 +890,9 @@ static int dsa_switch_setup(struct dsa_switch *ds)
dl_priv = devlink_priv(ds->devlink);
dl_priv->ds = ds;
- /* Setup devlink port instances now, so that the switch
- * setup() can register regions etc, against the ports
- */
- dsa_switch_for_each_port(dp, ds) {
- err = dsa_port_devlink_setup(dp);
- if (err)
- goto unregister_devlink_ports;
- }
-
err = dsa_switch_register_notifier(ds);
if (err)
- goto unregister_devlink_ports;
+ goto devlink_free;
ds->configure_vlan_while_not_filtering = true;
@@ -923,7 +913,10 @@ static int dsa_switch_setup(struct dsa_switch *ds)
dsa_slave_mii_bus_init(ds);
- err = mdiobus_register(ds->slave_mii_bus);
+ dn = of_get_child_by_name(ds->dev->of_node, "mdio");
+
+ err = of_mdiobus_register(ds->slave_mii_bus, dn);
+ of_node_put(dn);
if (err < 0)
goto free_slave_mii_bus;
}
@@ -940,9 +933,7 @@ teardown:
ds->ops->teardown(ds);
unregister_notifier:
dsa_switch_unregister_notifier(ds);
-unregister_devlink_ports:
- dsa_switch_for_each_port(dp, ds)
- dsa_port_devlink_teardown(dp);
+devlink_free:
devlink_free(ds->devlink);
ds->devlink = NULL;
return err;
@@ -950,8 +941,6 @@ unregister_devlink_ports:
static void dsa_switch_teardown(struct dsa_switch *ds)
{
- struct dsa_port *dp;
-
if (!ds->setup)
return;
@@ -970,8 +959,6 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
dsa_switch_unregister_notifier(ds);
if (ds->devlink) {
- dsa_switch_for_each_port(dp, ds)
- dsa_port_devlink_teardown(dp);
devlink_free(ds->devlink);
ds->devlink = NULL;
}
@@ -1024,7 +1011,7 @@ static int dsa_tree_setup_ports(struct dsa_switch_tree *dst)
if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) {
err = dsa_port_setup(dp);
if (err) {
- err = dsa_port_reinit_as_unused(dp);
+ err = dsa_port_setup_as_unused(dp);
if (err)
goto teardown;
}
@@ -1057,33 +1044,48 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *dp;
- int err;
+ struct dsa_port *cpu_dp;
+ int err = 0;
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list) {
- if (dsa_port_is_cpu(dp)) {
- err = dsa_master_setup(dp->master, dp);
- if (err)
- return err;
- }
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+ bool admin_up = (master->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(master);
+
+ err = dsa_master_setup(master, cpu_dp);
+ if (err)
+ break;
+
+ /* Replay master state event */
+ dsa_tree_master_admin_state_change(dst, master, admin_up);
+ dsa_tree_master_oper_state_change(dst, master,
+ netif_oper_up(master));
}
rtnl_unlock();
- return 0;
+ return err;
}
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *dp;
+ struct dsa_port *cpu_dp;
rtnl_lock();
- list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_is_cpu(dp))
- dsa_master_teardown(dp->master);
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+
+ /* Synthesizing an "admin down" state is sufficient for
+ * the switches to get a notification if the master is
+ * currently up and running.
+ */
+ dsa_tree_master_admin_state_change(dst, master, false);
+
+ dsa_master_teardown(master);
+ }
rtnl_unlock();
}
@@ -1137,17 +1139,17 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
if (err)
goto teardown_cpu_ports;
- err = dsa_tree_setup_master(dst);
+ err = dsa_tree_setup_ports(dst);
if (err)
goto teardown_switches;
- err = dsa_tree_setup_ports(dst);
+ err = dsa_tree_setup_master(dst);
if (err)
- goto teardown_master;
+ goto teardown_ports;
err = dsa_tree_setup_lags(dst);
if (err)
- goto teardown_ports;
+ goto teardown_master;
dst->setup = true;
@@ -1155,10 +1157,10 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
return 0;
-teardown_ports:
- dsa_tree_teardown_ports(dst);
teardown_master:
dsa_tree_teardown_master(dst);
+teardown_ports:
+ dsa_tree_teardown_ports(dst);
teardown_switches:
dsa_tree_teardown_switches(dst);
teardown_cpu_ports:
@@ -1176,10 +1178,10 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_lags(dst);
- dsa_tree_teardown_ports(dst);
-
dsa_tree_teardown_master(dst);
+ dsa_tree_teardown_ports(dst);
+
dsa_tree_teardown_switches(dst);
dsa_tree_teardown_cpu_ports(dst);
@@ -1230,7 +1232,6 @@ out_disconnect:
* they would have formed disjoint trees (different "dsa,member" values).
*/
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops)
{
@@ -1246,12 +1247,9 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
* attempts to change the tagging protocol. If we ever lift the IFF_UP
* restriction, there needs to be another mutex which serializes this.
*/
- if (master->flags & IFF_UP)
- goto out_unlock;
-
- list_for_each_entry(dp, &dst->ports, list) {
- if (!dsa_port_is_user(dp))
- continue;
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (dsa_port_to_master(dp)->flags & IFF_UP)
+ goto out_unlock;
if (dp->slave->flags & IFF_UP)
goto out_unlock;
@@ -1261,7 +1259,7 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
info.tag_ops = tag_ops;
err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
if (err)
- return err;
+ goto out_unwind_tagger;
err = dsa_tree_bind_tag_proto(dst, tag_ops);
if (err)
@@ -1279,6 +1277,64 @@ out_unlock:
return err;
}
+static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master)
+{
+ struct dsa_notifier_master_state_info info;
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+
+ info.master = master;
+ info.operational = dsa_port_master_is_operational(cpu_dp);
+
+ dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
+}
+
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ /* Don't keep track of admin state on LAG DSA masters,
+ * but rather just of physical DSA masters
+ */
+ if (netif_is_lag_master(master))
+ return;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (up && cpu_dp->master_oper_up))
+ notify = true;
+
+ cpu_dp->master_admin_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ /* Don't keep track of oper state on LAG DSA masters,
+ * but rather just of physical DSA masters
+ */
+ if (netif_is_lag_master(master))
+ return;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (cpu_dp->master_admin_up && up))
+ notify = true;
+
+ cpu_dp->master_oper_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
{
struct dsa_switch_tree *dst = ds->dst;
@@ -1295,6 +1351,11 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
dp->ds = ds;
dp->index = index;
+ mutex_init(&dp->addr_lists_lock);
+ mutex_init(&dp->vlans_lock);
+ INIT_LIST_HEAD(&dp->fdbs);
+ INIT_LIST_HEAD(&dp->mdbs);
+ INIT_LIST_HEAD(&dp->vlans);
INIT_LIST_HEAD(&dp->list);
list_add_tail(&dp->list, &dst->ports);
@@ -1348,9 +1409,9 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
const char *user_protocol)
{
+ const struct dsa_device_ops *tag_ops = NULL;
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
- const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol default_proto;
/* Find out which protocol the switch would prefer. */
@@ -1373,10 +1434,17 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
}
tag_ops = dsa_find_tagger_by_name(user_protocol);
- } else {
- tag_ops = dsa_tag_driver_get(default_proto);
+ if (IS_ERR(tag_ops)) {
+ dev_warn(ds->dev,
+ "Failed to find a tagging driver for protocol %s, using default\n",
+ user_protocol);
+ tag_ops = NULL;
+ }
}
+ if (!tag_ops)
+ tag_ops = dsa_tag_driver_get(default_proto);
+
if (IS_ERR(tag_ops)) {
if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
return -EPROBE_DEFER;
@@ -1436,6 +1504,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
const char *user_protocol;
master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
if (!master)
return -EPROBE_DEFER;
@@ -1633,6 +1702,9 @@ static void dsa_switch_release_ports(struct dsa_switch *ds)
struct dsa_port *dp, *next;
dsa_switch_for_each_port_safe(dp, next, ds) {
+ WARN_ON(!list_empty(&dp->fdbs));
+ WARN_ON(!list_empty(&dp->mdbs));
+ WARN_ON(!list_empty(&dp->vlans));
list_del(&dp->list);
kfree(dp);
}
@@ -1718,38 +1790,30 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch);
void dsa_switch_shutdown(struct dsa_switch *ds)
{
struct net_device *master, *slave_dev;
- LIST_HEAD(unregister_list);
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
+
+ if (!ds->setup)
+ goto out;
+
rtnl_lock();
dsa_switch_for_each_user_port(dp, ds) {
- master = dp->cpu_dp->master;
+ master = dsa_port_to_master(dp);
slave_dev = dp->slave;
netdev_upper_dev_unlink(master, slave_dev);
- /* Just unlinking ourselves as uppers of the master is not
- * sufficient. When the master net device unregisters, that will
- * also call dev_close, which we will catch as NETDEV_GOING_DOWN
- * and trigger a dev_close on our own devices (dsa_slave_close).
- * In turn, that will call dev_mc_unsync on the master's net
- * device. If the master is also a DSA switch port, this will
- * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
- * its own master. Lockdep will complain about the fact that
- * all cascaded masters have the same dsa_master_addr_list_lock_key,
- * which it normally would not do if the cascaded masters would
- * be in a proper upper/lower relationship, which we've just
- * destroyed.
- * To suppress the lockdep warnings, let's actually unregister
- * the DSA slave interfaces too, to avoid the nonsensical
- * multicast address list synchronization on shutdown.
- */
- unregister_netdevice_queue(slave_dev, &unregister_list);
}
- unregister_netdevice_many(&unregister_list);
+
+ /* Disconnect from further netdevice notifiers on the master,
+ * since netdev_uses_dsa() will now return false.
+ */
+ dsa_switch_for_each_cpu_port(dp, ds)
+ dp->master->dsa_ptr = NULL;
rtnl_unlock();
+out:
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 760306f0012f..6e65c7ffd6f3 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -25,6 +25,8 @@ enum {
DSA_NOTIFIER_FDB_DEL,
DSA_NOTIFIER_HOST_FDB_ADD,
DSA_NOTIFIER_HOST_FDB_DEL,
+ DSA_NOTIFIER_LAG_FDB_ADD,
+ DSA_NOTIFIER_LAG_FDB_DEL,
DSA_NOTIFIER_LAG_CHANGE,
DSA_NOTIFIER_LAG_JOIN,
DSA_NOTIFIER_LAG_LEAVE,
@@ -34,12 +36,15 @@ enum {
DSA_NOTIFIER_HOST_MDB_DEL,
DSA_NOTIFIER_VLAN_ADD,
DSA_NOTIFIER_VLAN_DEL,
+ DSA_NOTIFIER_HOST_VLAN_ADD,
+ DSA_NOTIFIER_HOST_VLAN_DEL,
DSA_NOTIFIER_MTU,
DSA_NOTIFIER_TAG_PROTO,
DSA_NOTIFIER_TAG_PROTO_CONNECT,
DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
+ DSA_NOTIFIER_MASTER_STATE_CHANGE,
};
/* DSA_NOTIFIER_AGEING_TIME */
@@ -49,50 +54,53 @@ struct dsa_notifier_ageing_time_info {
/* DSA_NOTIFIER_BRIDGE_* */
struct dsa_notifier_bridge_info {
+ const struct dsa_port *dp;
struct dsa_bridge bridge;
- int tree_index;
- int sw_index;
- int port;
bool tx_fwd_offload;
+ struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_FDB_* */
struct dsa_notifier_fdb_info {
- int sw_index;
- int port;
+ const struct dsa_port *dp;
const unsigned char *addr;
u16 vid;
+ struct dsa_db db;
+};
+
+/* DSA_NOTIFIER_LAG_FDB_* */
+struct dsa_notifier_lag_fdb_info {
+ struct dsa_lag *lag;
+ const unsigned char *addr;
+ u16 vid;
+ struct dsa_db db;
};
/* DSA_NOTIFIER_MDB_* */
struct dsa_notifier_mdb_info {
+ const struct dsa_port *dp;
const struct switchdev_obj_port_mdb *mdb;
- int sw_index;
- int port;
+ struct dsa_db db;
};
/* DSA_NOTIFIER_LAG_* */
struct dsa_notifier_lag_info {
- struct net_device *lag;
- int sw_index;
- int port;
-
+ const struct dsa_port *dp;
+ struct dsa_lag lag;
struct netdev_lag_upper_info *info;
+ struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_VLAN_* */
struct dsa_notifier_vlan_info {
+ const struct dsa_port *dp;
const struct switchdev_obj_port_vlan *vlan;
- int sw_index;
- int port;
struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_MTU */
struct dsa_notifier_mtu_info {
- bool targeted_match;
- int sw_index;
- int port;
+ const struct dsa_port *dp;
int mtu;
};
@@ -103,16 +111,19 @@ struct dsa_notifier_tag_proto_info {
/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
struct dsa_notifier_tag_8021q_vlan_info {
- int tree_index;
- int sw_index;
- int port;
+ const struct dsa_port *dp;
u16 vid;
};
+/* DSA_NOTIFIER_MASTER_STATE_CHANGE */
+struct dsa_notifier_master_state_info {
+ const struct net_device *master;
+ bool operational;
+};
+
struct dsa_switchdev_event_work {
- struct dsa_switch *ds;
- int port;
struct net_device *dev;
+ struct net_device *orig_dev;
struct work_struct work;
unsigned long event;
/* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
@@ -123,6 +134,21 @@ struct dsa_switchdev_event_work {
bool host_addr;
};
+enum dsa_standalone_event {
+ DSA_UC_ADD,
+ DSA_UC_DEL,
+ DSA_MC_ADD,
+ DSA_MC_DEL,
+};
+
+struct dsa_standalone_event_work {
+ struct work_struct work;
+ struct net_device *dev;
+ enum dsa_standalone_event event;
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
struct dsa_slave_priv {
/* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
@@ -146,8 +172,9 @@ const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol);
void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
+bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b);
+
bool dsa_schedule_work(struct work_struct *work);
-void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
@@ -158,6 +185,11 @@ static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
/* master.c */
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
void dsa_master_teardown(struct net_device *dev);
+int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack);
+void dsa_master_lag_teardown(struct net_device *lag_dev,
+ struct dsa_port *cpu_dp);
static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
int device, int port)
@@ -174,10 +206,16 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
return NULL;
}
+/* netlink.c */
+extern struct rtnl_link_ops dsa_link_ops __read_mostly;
+
/* port.c */
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops);
int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
+int dsa_port_set_mst_state(struct dsa_port *dp,
+ const struct switchdev_mst_state *state,
+ struct netlink_ext_ack *extack);
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
void dsa_port_disable_rt(struct dsa_port *dp);
@@ -197,25 +235,40 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack);
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
-int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool targeted_match);
+int dsa_port_mst_enable(struct dsa_port *dp, bool on,
+ struct netlink_ext_ack *extack);
+int dsa_port_vlan_msti(struct dsa_port *dp,
+ const struct switchdev_vlan_msti *msti);
+int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
-int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
-int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid);
+int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid);
+int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid);
+int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_host_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_host_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
@@ -227,6 +280,11 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct netlink_ext_ack *extack);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_host_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int dsa_port_host_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
int dsa_port_mrp_add(const struct dsa_port *dp,
const struct switchdev_obj_mrp *mrp);
int dsa_port_mrp_del(const struct dsa_port *dp,
@@ -236,12 +294,16 @@ int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
const struct switchdev_obj_ring_role_mrp *mrp);
int dsa_port_phylink_create(struct dsa_port *dp);
-int dsa_port_link_register_of(struct dsa_port *dp);
-void dsa_port_link_unregister_of(struct dsa_port *dp);
+void dsa_port_phylink_destroy(struct dsa_port *dp);
+int dsa_shared_port_link_register_of(struct dsa_port *dp);
+void dsa_shared_port_link_unregister_of(struct dsa_port *dp);
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc);
+int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
+ struct netlink_ext_ack *extack);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
@@ -255,8 +317,12 @@ int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
+void dsa_slave_sync_ha(struct net_device *dev);
+void dsa_slave_unsync_ha(struct net_device *dev);
void dsa_slave_setup_tagger(struct net_device *slave);
int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
+int dsa_slave_change_master(struct net_device *dev, struct net_device *master,
+ struct netlink_ext_ack *extack);
int dsa_slave_manage_vlan_filtering(struct net_device *dev,
bool vlan_filtering);
@@ -272,7 +338,7 @@ dsa_slave_to_master(const struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- return dp->cpu_dp->master;
+ return dsa_port_to_master(dp);
}
/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged
@@ -473,15 +539,37 @@ static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
+static inline bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds)
+{
+ return ds->ops->port_fdb_add && ds->ops->port_fdb_del &&
+ ds->fdb_isolation && !ds->vlan_filtering_is_global &&
+ !ds->needs_standalone_vlan_filtering;
+}
+
+static inline bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds)
+{
+ return ds->ops->port_mdb_add && ds->ops->port_mdb_del &&
+ ds->fdb_isolation && !ds->vlan_filtering_is_global &&
+ !ds->needs_standalone_vlan_filtering;
+}
+
/* dsa2.c */
-void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag);
-void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag);
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag);
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag);
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev);
+struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst);
int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
int dsa_broadcast(unsigned long e, void *v);
int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
- struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up);
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up);
unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
void dsa_bridge_num_put(const struct net_device *bridge_dev,
unsigned int bridge_num);
@@ -489,10 +577,6 @@ struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
const struct net_device *br);
/* tag_8021q.c */
-int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info);
-int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info);
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_tag_8021q_vlan_info *info);
int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 2199104ca7df..40367ab41cf8 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -58,7 +58,7 @@ static void dsa_master_get_regs(struct net_device *dev,
}
cpu_info = (struct ethtool_drvinfo *)data;
- strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver));
+ strscpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver));
data += sizeof(*cpu_info);
cpu_regs = (struct ethtool_regs *)data;
data += sizeof(*cpu_regs);
@@ -226,6 +226,9 @@ static int dsa_master_ethtool_setup(struct net_device *dev)
struct dsa_switch *ds = cpu_dp->ds;
struct ethtool_ops *ops;
+ if (netif_is_lag_master(dev))
+ return 0;
+
ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
if (!ops)
return -ENOMEM;
@@ -250,6 +253,9 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
+ if (netif_is_lag_master(dev))
+ return;
+
dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
cpu_dp->orig_ethtool_ops = NULL;
}
@@ -257,14 +263,22 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
static void dsa_netdev_ops_set(struct net_device *dev,
const struct dsa_netdevice_ops *ops)
{
+ if (netif_is_lag_master(dev))
+ return;
+
dev->dsa_ptr->netdev_ops = ops;
}
+/* Keep the master always promiscuous if the tagging protocol requires that
+ * (garbles MAC DA) or if it doesn't support unicast filtering, case in which
+ * it would revert to promiscuous mode as soon as we call dev_uc_add() on it
+ * anyway.
+ */
static void dsa_master_set_promiscuity(struct net_device *dev, int inc)
{
const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops;
- if (!ops->promisc_on_master)
+ if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master)
return;
ASSERT_RTNL();
@@ -302,7 +316,7 @@ static ssize_t tagging_store(struct device *d, struct device_attribute *attr,
*/
goto out;
- err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, dev, new_tag_ops,
+ err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, new_tag_ops,
old_tag_ops);
if (err) {
/* On failure the old tagger is restored, so we don't need the
@@ -330,21 +344,43 @@ static const struct attribute_group dsa_group = {
.attrs = dsa_slave_attrs,
};
-static struct lock_class_key dsa_master_addr_list_lock_key;
+static void dsa_master_reset_mtu(struct net_device *dev)
+{
+ int err;
+
+ err = dev_set_mtu(dev, ETH_DATA_LEN);
+ if (err)
+ netdev_dbg(dev,
+ "Unable to reset MTU to exclude DSA overheads\n");
+}
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
+ const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops;
struct dsa_switch *ds = cpu_dp->ds;
struct device_link *consumer_link;
- int ret;
+ int mtu, ret;
+
+ mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops);
/* The DSA master must use SET_NETDEV_DEV for this to work. */
- consumer_link = device_link_add(ds->dev, dev->dev.parent,
- DL_FLAG_AUTOREMOVE_CONSUMER);
- if (!consumer_link)
- netdev_err(dev,
- "Failed to create a device link to DSA switch %s\n",
- dev_name(ds->dev));
+ if (!netif_is_lag_master(dev)) {
+ consumer_link = device_link_add(ds->dev, dev->dev.parent,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
+ if (!consumer_link)
+ netdev_err(dev,
+ "Failed to create a device link to DSA switch %s\n",
+ dev_name(ds->dev));
+ }
+
+ /* The switch driver may not implement ->port_change_mtu(), case in
+ * which dsa_slave_change_mtu() will not update the master MTU either,
+ * so we need to do that here.
+ */
+ ret = dev_set_mtu(dev, mtu);
+ if (ret)
+ netdev_warn(dev, "error %d setting MTU to %d to include DSA overhead\n",
+ ret, mtu);
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
@@ -353,8 +389,6 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
- lockdep_set_class(&dev->addr_list_lock,
- &dsa_master_addr_list_lock_key);
dsa_master_set_promiscuity(dev, 1);
@@ -383,6 +417,7 @@ void dsa_master_teardown(struct net_device *dev)
sysfs_remove_group(&dev->dev.kobj, &dsa_group);
dsa_netdev_ops_set(dev, NULL);
dsa_master_ethtool_teardown(dev);
+ dsa_master_reset_mtu(dev);
dsa_master_set_promiscuity(dev, -1);
dev->dsa_ptr = NULL;
@@ -393,3 +428,52 @@ void dsa_master_teardown(struct net_device *dev)
*/
wmb();
}
+
+int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack)
+{
+ bool master_setup = false;
+ int err;
+
+ if (!netdev_uses_dsa(lag_dev)) {
+ err = dsa_master_setup(lag_dev, cpu_dp);
+ if (err)
+ return err;
+
+ master_setup = true;
+ }
+
+ err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack);
+ if (err) {
+ if (extack && !extack->_msg)
+ NL_SET_ERR_MSG_MOD(extack,
+ "CPU port failed to join LAG");
+ goto out_master_teardown;
+ }
+
+ return 0;
+
+out_master_teardown:
+ if (master_setup)
+ dsa_master_teardown(lag_dev);
+ return err;
+}
+
+/* Tear down a master if there isn't any other user port on it,
+ * optionally also destroying LAG information.
+ */
+void dsa_master_lag_teardown(struct net_device *lag_dev,
+ struct dsa_port *cpu_dp)
+{
+ struct net_device *upper;
+ struct list_head *iter;
+
+ dsa_port_lag_leave(cpu_dp, lag_dev);
+
+ netdev_for_each_upper_dev_rcu(lag_dev, upper, iter)
+ if (dsa_slave_dev_check(upper))
+ return;
+
+ dsa_master_teardown(lag_dev);
+}
diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c
new file mode 100644
index 000000000000..ecf9ed1de185
--- /dev/null
+++ b/net/dsa/netlink.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 NXP
+ */
+#include <linux/netdevice.h>
+#include <net/rtnetlink.h>
+
+#include "dsa_priv.h"
+
+static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = {
+ [IFLA_DSA_MASTER] = { .type = NLA_U32 },
+};
+
+static int dsa_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!data)
+ return 0;
+
+ if (data[IFLA_DSA_MASTER]) {
+ u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]);
+ struct net_device *master;
+
+ master = __dev_get_by_index(dev_net(dev), ifindex);
+ if (!master)
+ return -EINVAL;
+
+ err = dsa_slave_change_master(dev, master, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static size_t dsa_get_size(const struct net_device *dev)
+{
+ return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */
+ 0;
+}
+
+static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct net_device *master = dsa_slave_to_master(dev);
+
+ if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+struct rtnl_link_ops dsa_link_ops __read_mostly = {
+ .kind = "dsa",
+ .priv_size = sizeof(struct dsa_port),
+ .maxtype = IFLA_DSA_MAX,
+ .policy = dsa_policy,
+ .changelink = dsa_changelink,
+ .get_size = dsa_get_size,
+ .fill_info = dsa_fill_info,
+ .netns_refund = true,
+};
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bd78192e0e47..208168276995 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -7,6 +7,7 @@
*/
#include <linux/if_bridge.h>
+#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
@@ -30,12 +31,11 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
return dsa_tree_notify(dp->ds->dst, e, v);
}
-static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp, u16 vid)
{
struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
struct switchdev_notifier_fdb_info info = {
- /* flush all VLANs */
- .vid = 0,
+ .vid = vid,
};
/* When the port becomes standalone it has already left the bridge.
@@ -57,7 +57,42 @@ static void dsa_port_fast_age(const struct dsa_port *dp)
ds->ops->port_fast_age(ds, dp->index);
- dsa_port_notify_bridge_fdb_flush(dp);
+ /* flush all VLANs */
+ dsa_port_notify_bridge_fdb_flush(dp, 0);
+}
+
+static int dsa_port_vlan_fast_age(const struct dsa_port *dp, u16 vid)
+{
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ if (!ds->ops->port_vlan_fast_age)
+ return -EOPNOTSUPP;
+
+ err = ds->ops->port_vlan_fast_age(ds, dp->index, vid);
+
+ if (!err)
+ dsa_port_notify_bridge_fdb_flush(dp, vid);
+
+ return err;
+}
+
+static int dsa_port_msti_fast_age(const struct dsa_port *dp, u16 msti)
+{
+ DECLARE_BITMAP(vids, VLAN_N_VID) = { 0 };
+ int err, vid;
+
+ err = br_mst_get_info(dsa_port_bridge_dev_get(dp), msti, vids);
+ if (err)
+ return err;
+
+ for_each_set_bit(vid, vids, VLAN_N_VID) {
+ err = dsa_port_vlan_fast_age(dp, vid);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static bool dsa_port_can_configure_learning(struct dsa_port *dp)
@@ -111,11 +146,50 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
bool do_fast_age)
{
+ struct dsa_switch *ds = dp->ds;
int err;
err = dsa_port_set_state(dp, state, do_fast_age);
+ if (err && err != -EOPNOTSUPP) {
+ dev_err(ds->dev, "port %d failed to set STP state %u: %pe\n",
+ dp->index, state, ERR_PTR(err));
+ }
+}
+
+int dsa_port_set_mst_state(struct dsa_port *dp,
+ const struct switchdev_mst_state *state,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dp->ds;
+ u8 prev_state;
+ int err;
+
+ if (!ds->ops->port_mst_state_set)
+ return -EOPNOTSUPP;
+
+ err = br_mst_get_state(dsa_port_to_bridge_port(dp), state->msti,
+ &prev_state);
if (err)
- pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
+ return err;
+
+ err = ds->ops->port_mst_state_set(ds, dp->index, state);
+ if (err)
+ return err;
+
+ if (!(dp->learning &&
+ (prev_state == BR_STATE_LEARNING ||
+ prev_state == BR_STATE_FORWARDING) &&
+ (state->state == BR_STATE_DISABLED ||
+ state->state == BR_STATE_BLOCKING ||
+ state->state == BR_STATE_LISTENING)))
+ return 0;
+
+ err = dsa_port_msti_fast_age(dp, state->msti);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unable to flush associated VLANs");
+
+ return 0;
}
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
@@ -172,11 +246,65 @@ void dsa_port_disable(struct dsa_port *dp)
rtnl_unlock();
}
+static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
+ struct dsa_bridge bridge)
+{
+ struct netlink_ext_ack extack = {0};
+ bool change_vlan_filtering = false;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
+ bool vlan_filtering;
+ int err;
+
+ if (ds->needs_standalone_vlan_filtering &&
+ !br_vlan_enabled(bridge.dev)) {
+ change_vlan_filtering = true;
+ vlan_filtering = true;
+ } else if (!ds->needs_standalone_vlan_filtering &&
+ br_vlan_enabled(bridge.dev)) {
+ change_vlan_filtering = true;
+ vlan_filtering = false;
+ }
+
+ /* If the bridge was vlan_filtering, the bridge core doesn't trigger an
+ * event for changing vlan_filtering setting upon slave ports leaving
+ * it. That is a good thing, because that lets us handle it and also
+ * handle the case where the switch's vlan_filtering setting is global
+ * (not per port). When that happens, the correct moment to trigger the
+ * vlan_filtering callback is only when the last port leaves the last
+ * VLAN-aware bridge.
+ */
+ if (change_vlan_filtering && ds->vlan_filtering_is_global) {
+ dsa_switch_for_each_port(other_dp, ds) {
+ struct net_device *br = dsa_port_bridge_dev_get(other_dp);
+
+ if (br && br_vlan_enabled(br)) {
+ change_vlan_filtering = false;
+ break;
+ }
+ }
+ }
+
+ if (!change_vlan_filtering)
+ return;
+
+ err = dsa_port_vlan_filtering(dp, vlan_filtering, &extack);
+ if (extack._msg) {
+ dev_err(ds->dev, "port %d: %s\n", dp->index,
+ extack._msg);
+ }
+ if (err && err != -EOPNOTSUPP) {
+ dev_err(ds->dev,
+ "port %d failed to reset VLAN filtering to %d: %pe\n",
+ dp->index, vlan_filtering, ERR_PTR(err));
+ }
+}
+
static int dsa_port_inherit_brport_flags(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
- BR_BCAST_FLOOD;
+ BR_BCAST_FLOOD | BR_PORT_LOCKED;
struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
int flag, err;
@@ -200,7 +328,7 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
{
const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
- BR_BCAST_FLOOD;
+ BR_BCAST_FLOOD | BR_PORT_LOCKED;
int flag, err;
for_each_set_bit(flag, &mask, 32) {
@@ -243,7 +371,8 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
return 0;
}
-static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
+static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp,
+ struct dsa_bridge bridge)
{
/* Configure the port for standalone mode (no address learning,
* flood everything).
@@ -263,7 +392,7 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
*/
dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
- /* VLAN filtering is handled by dsa_switch_bridge_leave */
+ dsa_port_reset_vlan_filtering(dp, bridge);
/* Ageing time may be global to the switch chip, so don't change it
* here because we have no good reason (or value) to change it to.
@@ -321,18 +450,30 @@ static void dsa_port_bridge_destroy(struct dsa_port *dp,
kfree(bridge);
}
+static bool dsa_port_supports_mst(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ return ds->ops->vlan_msti_set &&
+ ds->ops->port_mst_state_set &&
+ ds->ops->port_vlan_fast_age &&
+ dsa_port_can_configure_learning(dp);
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_bridge_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
+ .extack = extack,
};
struct net_device *dev = dp->slave;
struct net_device *brport_dev;
int err;
+ if (br_mst_enabled(br) && !dsa_port_supports_mst(dp))
+ return -EOPNOTSUPP;
+
/* Here the interface is already bridged. Reflect the current
* configuration so that drivers can program their chips accordingly.
*/
@@ -367,6 +508,7 @@ out_rollback_unoffload:
switchdev_bridge_port_unoffload(brport_dev, dp,
&dsa_slave_switchdev_notifier,
&dsa_slave_switchdev_blocking_notifier);
+ dsa_flush_workqueue();
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
out_rollback:
@@ -392,13 +534,18 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
- .bridge = *dp->bridge,
+ .dp = dp,
};
int err;
+ /* If the port could not be offloaded to begin with, then
+ * there is nothing to do.
+ */
+ if (!dp->bridge)
+ return;
+
+ info.bridge = *dp->bridge;
+
/* Here the port is already unbridged. Reflect the current configuration
* so that drivers can program their chips accordingly.
*/
@@ -410,19 +557,18 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
"port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n",
dp->index, ERR_PTR(err));
- dsa_port_switchdev_unsync_attrs(dp);
+ dsa_port_switchdev_unsync_attrs(dp, info.bridge);
}
int dsa_port_lag_change(struct dsa_port *dp,
struct netdev_lag_lower_state_info *linfo)
{
struct dsa_notifier_lag_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
};
bool tx_enabled;
- if (!dp->lag_dev)
+ if (!dp->lag)
return 0;
/* On statically configured aggregates (e.g. loadbalance
@@ -440,27 +586,70 @@ int dsa_port_lag_change(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info);
}
-int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
+static int dsa_port_lag_create(struct dsa_port *dp,
+ struct net_device *lag_dev)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_lag *lag;
+
+ lag = dsa_tree_lag_find(ds->dst, lag_dev);
+ if (lag) {
+ refcount_inc(&lag->refcount);
+ dp->lag = lag;
+ return 0;
+ }
+
+ lag = kzalloc(sizeof(*lag), GFP_KERNEL);
+ if (!lag)
+ return -ENOMEM;
+
+ refcount_set(&lag->refcount, 1);
+ mutex_init(&lag->fdb_lock);
+ INIT_LIST_HEAD(&lag->fdbs);
+ lag->dev = lag_dev;
+ dsa_lag_map(ds->dst, lag);
+ dp->lag = lag;
+
+ return 0;
+}
+
+static void dsa_port_lag_destroy(struct dsa_port *dp)
+{
+ struct dsa_lag *lag = dp->lag;
+
+ dp->lag = NULL;
+ dp->lag_tx_enabled = false;
+
+ if (!refcount_dec_and_test(&lag->refcount))
+ return;
+
+ WARN_ON(!list_empty(&lag->fdbs));
+ dsa_lag_unmap(dp->ds->dst, lag);
+ kfree(lag);
+}
+
+int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netdev_lag_upper_info *uinfo,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_lag_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
- .lag = lag,
+ .dp = dp,
.info = uinfo,
+ .extack = extack,
};
struct net_device *bridge_dev;
int err;
- dsa_lag_map(dp->ds->dst, lag);
- dp->lag_dev = lag;
+ err = dsa_port_lag_create(dp, lag_dev);
+ if (err)
+ goto err_lag_create;
+ info.lag = *dp->lag;
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
if (err)
goto err_lag_join;
- bridge_dev = netdev_master_upper_dev_get(lag);
+ bridge_dev = netdev_master_upper_dev_get(lag_dev);
if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
return 0;
@@ -473,12 +662,12 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
err_bridge_join:
dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
err_lag_join:
- dp->lag_dev = NULL;
- dsa_lag_unmap(dp->ds->dst, lag);
+ dsa_port_lag_destroy(dp);
+err_lag_create:
return err;
}
-void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev)
{
struct net_device *br = dsa_port_bridge_dev_get(dp);
@@ -486,17 +675,15 @@ void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
dsa_port_pre_bridge_leave(dp, br);
}
-void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
+void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev)
{
struct net_device *br = dsa_port_bridge_dev_get(dp);
struct dsa_notifier_lag_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
- .lag = lag,
+ .dp = dp,
};
int err;
- if (!dp->lag_dev)
+ if (!dp->lag)
return;
/* Port might have been part of a LAG that in turn was
@@ -505,16 +692,15 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
if (br)
dsa_port_bridge_leave(dp, br);
- dp->lag_tx_enabled = false;
- dp->lag_dev = NULL;
+ info.lag = *dp->lag;
+
+ dsa_port_lag_destroy(dp);
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
if (err)
dev_err(dp->ds->dev,
"port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n",
dp->index, ERR_PTR(err));
-
- dsa_lag_unmap(dp->ds->dst, lag);
}
/* Must be called under rcu_read_lock() */
@@ -619,7 +805,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
ds->vlan_filtering = vlan_filtering;
dsa_switch_for_each_user_port(other_dp, ds) {
- struct net_device *slave = dp->slave;
+ struct net_device *slave = other_dp->slave;
/* We might be called in the unbind path, so not
* all slave devices might still be registered.
@@ -686,6 +872,17 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock)
return 0;
}
+int dsa_port_mst_enable(struct dsa_port *dp, bool on,
+ struct netlink_ext_ack *extack)
+{
+ if (on && !dsa_port_supports_mst(dp)) {
+ NL_SET_ERR_MSG_MOD(extack, "Hardware does not support MST");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
@@ -729,13 +926,29 @@ int dsa_port_bridge_flags(struct dsa_port *dp,
return 0;
}
-int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool targeted_match)
+void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_set_host_flood)
+ ds->ops->port_set_host_flood(ds, dp->index, uc, mc);
+}
+
+int dsa_port_vlan_msti(struct dsa_port *dp,
+ const struct switchdev_vlan_msti *msti)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->vlan_msti_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->vlan_msti_set(ds, *dp->bridge, msti);
+}
+
+int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu)
{
struct dsa_notifier_mtu_info info = {
- .sw_index = dp->ds->index,
- .targeted_match = targeted_match,
- .port = dp->index,
+ .dp = dp,
.mtu = new_mtu,
};
@@ -746,12 +959,22 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ /* Refcounting takes bridge.num as a key, and should be global for all
+ * bridges in the absence of FDB isolation, and per bridge otherwise.
+ * Force the bridge.num to zero here in the absence of FDB isolation.
+ */
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
}
@@ -759,54 +982,157 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
-
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
}
-int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+static int dsa_port_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
+ .db = db,
};
- struct dsa_port *cpu_dp = dp->cpu_dp;
- int err;
- err = dev_uc_add(cpu_dp->master, addr);
- if (err)
- return err;
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
}
-int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+int dsa_port_standalone_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_fdb_add(dp, addr, vid, db);
+}
+
+int dsa_port_bridge_host_fdb_add(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
+ struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ };
+ int err;
+
+ /* Avoid a call to __dev_set_promiscuity() on the master, which
+ * requires rtnl_lock(), since we can't guarantee that is held here,
+ * and we can't take it either.
+ */
+ if (master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_add(master, addr);
+ if (err)
+ return err;
+ }
+
+ return dsa_port_host_fdb_add(dp, addr, vid, db);
+}
+
+static int dsa_port_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
+ .db = db,
};
- struct dsa_port *cpu_dp = dp->cpu_dp;
- int err;
- err = dev_uc_del(cpu_dp->master, addr);
- if (err)
- return err;
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
}
+int dsa_port_standalone_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_fdb_del(dp, addr, vid, db);
+}
+
+int dsa_port_bridge_host_fdb_del(struct dsa_port *dp,
+ const unsigned char *addr, u16 vid)
+{
+ struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ };
+ int err;
+
+ if (master->priv_flags & IFF_UNICAST_FLT) {
+ err = dev_uc_del(master, addr);
+ if (err)
+ return err;
+ }
+
+ return dsa_port_host_fdb_del(dp, addr, vid, db);
+}
+
+int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_notifier_lag_fdb_info info = {
+ .lag = dp->lag,
+ .addr = addr,
+ .vid = vid,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_ADD, &info);
+}
+
+int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_notifier_lag_fdb_info info = {
+ .lag = dp->lag,
+ .addr = addr,
+ .vid = vid,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_LAG_FDB_DEL, &info);
+}
+
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
{
struct dsa_switch *ds = dp->ds;
@@ -822,11 +1148,17 @@ int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
}
@@ -834,48 +1166,106 @@ int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
+ .db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
+ },
};
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
}
-int dsa_port_host_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
+ .db = db,
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
+}
+
+int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_mdb_add(dp, mdb, db);
+}
+
+int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
};
- struct dsa_port *cpu_dp = dp->cpu_dp;
int err;
- err = dev_mc_add(cpu_dp->master, mdb->addr);
+ err = dev_mc_add(master, mdb->addr);
if (err)
return err;
- return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
+ return dsa_port_host_mdb_add(dp, mdb, db);
}
-int dsa_port_host_mdb_del(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+static int dsa_port_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
+ .db = db,
+ };
+
+ if (!dp->ds->fdb_isolation)
+ info.db.bridge.num = 0;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
+}
+
+int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_db db = {
+ .type = DSA_DB_PORT,
+ .dp = dp,
+ };
+
+ return dsa_port_host_mdb_del(dp, mdb, db);
+}
+
+int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_db db = {
+ .type = DSA_DB_BRIDGE,
+ .bridge = *dp->bridge,
};
- struct dsa_port *cpu_dp = dp->cpu_dp;
int err;
- err = dev_mc_del(cpu_dp->master, mdb->addr);
+ err = dev_mc_del(master, mdb->addr);
if (err)
return err;
- return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
+ return dsa_port_host_mdb_del(dp, mdb, db);
}
int dsa_port_vlan_add(struct dsa_port *dp,
@@ -883,8 +1273,7 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_vlan_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vlan = vlan,
.extack = extack,
};
@@ -896,14 +1285,53 @@ int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan)
{
struct dsa_notifier_vlan_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vlan = vlan,
};
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
}
+int dsa_port_host_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_notifier_vlan_info info = {
+ .dp = dp,
+ .vlan = vlan,
+ .extack = extack,
+ };
+ int err;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_ADD, &info);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid);
+
+ return err;
+}
+
+int dsa_port_host_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct net_device *master = dsa_port_to_master(dp);
+ struct dsa_notifier_vlan_info info = {
+ .dp = dp,
+ .vlan = vlan,
+ };
+ int err;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_DEL, &info);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid);
+
+ return err;
+}
+
int dsa_port_mrp_add(const struct dsa_port *dp,
const struct switchdev_obj_mrp *mrp)
{
@@ -948,6 +1376,136 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp);
}
+static int dsa_port_assign_master(struct dsa_port *dp,
+ struct net_device *master,
+ struct netlink_ext_ack *extack,
+ bool fail_on_err)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index, err;
+
+ err = ds->ops->port_change_master(ds, port, master, extack);
+ if (err && !fail_on_err)
+ dev_err(ds->dev, "port %d failed to assign master %s: %pe\n",
+ port, master->name, ERR_PTR(err));
+
+ if (err && fail_on_err)
+ return err;
+
+ dp->cpu_dp = master->dsa_ptr;
+ dp->cpu_port_in_lag = netif_is_lag_master(master);
+
+ return 0;
+}
+
+/* Change the dp->cpu_dp affinity for a user port. Note that both cross-chip
+ * notifiers and drivers have implicit assumptions about user-to-CPU-port
+ * mappings, so we unfortunately cannot delay the deletion of the objects
+ * (switchdev, standalone addresses, standalone VLANs) on the old CPU port
+ * until the new CPU port has been set up. So we need to completely tear down
+ * the old CPU port before changing it, and restore it on errors during the
+ * bringup of the new one.
+ */
+int dsa_port_change_master(struct dsa_port *dp, struct net_device *master,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp);
+ struct net_device *old_master = dsa_port_to_master(dp);
+ struct net_device *dev = dp->slave;
+ struct dsa_switch *ds = dp->ds;
+ bool vlan_filtering;
+ int err, tmp;
+
+ /* Bridges may hold host FDB, MDB and VLAN objects. These need to be
+ * migrated, so dynamically unoffload and later reoffload the bridge
+ * port.
+ */
+ if (bridge_dev) {
+ dsa_port_pre_bridge_leave(dp, bridge_dev);
+ dsa_port_bridge_leave(dp, bridge_dev);
+ }
+
+ /* The port might still be VLAN filtering even if it's no longer
+ * under a bridge, either due to ds->vlan_filtering_is_global or
+ * ds->needs_standalone_vlan_filtering. In turn this means VLANs
+ * on the CPU port.
+ */
+ vlan_filtering = dsa_port_is_vlan_filtering(dp);
+ if (vlan_filtering) {
+ err = dsa_slave_manage_vlan_filtering(dev, false);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to remove standalone VLANs");
+ goto rewind_old_bridge;
+ }
+ }
+
+ /* Standalone addresses, and addresses of upper interfaces like
+ * VLAN, LAG, HSR need to be migrated.
+ */
+ dsa_slave_unsync_ha(dev);
+
+ err = dsa_port_assign_master(dp, master, extack, true);
+ if (err)
+ goto rewind_old_addrs;
+
+ dsa_slave_sync_ha(dev);
+
+ if (vlan_filtering) {
+ err = dsa_slave_manage_vlan_filtering(dev, true);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to restore standalone VLANs");
+ goto rewind_new_addrs;
+ }
+ }
+
+ if (bridge_dev) {
+ err = dsa_port_bridge_join(dp, bridge_dev, extack);
+ if (err && err == -EOPNOTSUPP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to reoffload bridge");
+ goto rewind_new_vlan;
+ }
+ }
+
+ return 0;
+
+rewind_new_vlan:
+ if (vlan_filtering)
+ dsa_slave_manage_vlan_filtering(dev, false);
+
+rewind_new_addrs:
+ dsa_slave_unsync_ha(dev);
+
+ dsa_port_assign_master(dp, old_master, NULL, false);
+
+/* Restore the objects on the old CPU port */
+rewind_old_addrs:
+ dsa_slave_sync_ha(dev);
+
+ if (vlan_filtering) {
+ tmp = dsa_slave_manage_vlan_filtering(dev, true);
+ if (tmp) {
+ dev_err(ds->dev,
+ "port %d failed to restore standalone VLANs: %pe\n",
+ dp->index, ERR_PTR(tmp));
+ }
+ }
+
+rewind_old_bridge:
+ if (bridge_dev) {
+ tmp = dsa_port_bridge_join(dp, bridge_dev, extack);
+ if (tmp) {
+ dev_err(ds->dev,
+ "port %d failed to rejoin bridge %s: %pe\n",
+ dp->index, bridge_dev->name, ERR_PTR(tmp));
+ }
+ }
+
+ return err;
+}
+
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops)
{
@@ -1011,6 +1569,20 @@ static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
}
}
+static struct phylink_pcs *
+dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct phylink_pcs *pcs = ERR_PTR(-EOPNOTSUPP);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->phylink_mac_select_pcs)
+ pcs = ds->ops->phylink_mac_select_pcs(ds, dp->index, interface);
+
+ return pcs;
+}
+
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
@@ -1077,6 +1649,7 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
+ .mac_select_pcs = dsa_port_phylink_mac_select_pcs,
.mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
.mac_config = dsa_port_phylink_mac_config,
.mac_an_restart = dsa_port_phylink_mac_an_restart,
@@ -1088,6 +1661,7 @@ int dsa_port_phylink_create(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
phy_interface_t mode;
+ struct phylink *pl;
int err;
err = of_get_phy_mode(dp->dn, &mode);
@@ -1104,17 +1678,25 @@ int dsa_port_phylink_create(struct dsa_port *dp)
if (ds->ops->phylink_get_caps)
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
- dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
- mode, &dsa_port_phylink_mac_ops);
- if (IS_ERR(dp->pl)) {
- pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
- return PTR_ERR(dp->pl);
+ pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
+ mode, &dsa_port_phylink_mac_ops);
+ if (IS_ERR(pl)) {
+ pr_err("error creating PHYLINK: %ld\n", PTR_ERR(pl));
+ return PTR_ERR(pl);
}
+ dp->pl = pl;
+
return 0;
}
-static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+void dsa_port_phylink_destroy(struct dsa_port *dp)
+{
+ phylink_destroy(dp->pl);
+ dp->pl = NULL;
+}
+
+static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable)
{
struct dsa_switch *ds = dp->ds;
struct phy_device *phydev;
@@ -1152,7 +1734,7 @@ err_put_dev:
return err;
}
-static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp)
{
struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
@@ -1186,7 +1768,7 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
return 0;
}
-static int dsa_port_phylink_register(struct dsa_port *dp)
+static int dsa_shared_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
struct device_node *port_dn = dp->dn;
@@ -1194,7 +1776,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
dp->pl_config.dev = ds->dev;
dp->pl_config.type = PHYLINK_DEV;
- dp->pl_config.pcs_poll = ds->pcs_poll;
err = dsa_port_phylink_create(dp);
if (err)
@@ -1209,23 +1790,187 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
return 0;
err_phy_connect:
- phylink_destroy(dp->pl);
+ dsa_port_phylink_destroy(dp);
return err;
}
-int dsa_port_link_register_of(struct dsa_port *dp)
+/* During the initial DSA driver migration to OF, port nodes were sometimes
+ * added to device trees with no indication of how they should operate from a
+ * link management perspective (phy-handle, fixed-link, etc). Additionally, the
+ * phy-mode may be absent. The interpretation of these port OF nodes depends on
+ * their type.
+ *
+ * User ports with no phy-handle or fixed-link are expected to connect to an
+ * internal PHY located on the ds->slave_mii_bus at an MDIO address equal to
+ * the port number. This description is still actively supported.
+ *
+ * Shared (CPU and DSA) ports with no phy-handle or fixed-link are expected to
+ * operate at the maximum speed that their phy-mode is capable of. If the
+ * phy-mode is absent, they are expected to operate using the phy-mode
+ * supported by the port that gives the highest link speed. It is unspecified
+ * if the port should use flow control or not, half duplex or full duplex, or
+ * if the phy-mode is a SERDES link, whether in-band autoneg is expected to be
+ * enabled or not.
+ *
+ * In the latter case of shared ports, omitting the link management description
+ * from the firmware node is deprecated and strongly discouraged. DSA uses
+ * phylink, which rejects the firmware nodes of these ports for lacking
+ * required properties.
+ *
+ * For switches in this table, DSA will skip enforcing validation and will
+ * later omit registering a phylink instance for the shared ports, if they lack
+ * a fixed-link, a phy-handle, or a managed = "in-band-status" property.
+ * It becomes the responsibility of the driver to ensure that these ports
+ * operate at the maximum speed (whatever this means) and will interoperate
+ * with the DSA master or other cascade port, since phylink methods will not be
+ * invoked for them.
+ *
+ * If you are considering expanding this table for newly introduced switches,
+ * think again. It is OK to remove switches from this table if there aren't DT
+ * blobs in circulation which rely on defaulting the shared ports.
+ */
+static const char * const dsa_switches_apply_workarounds[] = {
+#if IS_ENABLED(CONFIG_NET_DSA_XRS700X)
+ "arrow,xrs7003e",
+ "arrow,xrs7003f",
+ "arrow,xrs7004e",
+ "arrow,xrs7004f",
+#endif
+#if IS_ENABLED(CONFIG_B53)
+ "brcm,bcm5325",
+ "brcm,bcm53115",
+ "brcm,bcm53125",
+ "brcm,bcm53128",
+ "brcm,bcm5365",
+ "brcm,bcm5389",
+ "brcm,bcm5395",
+ "brcm,bcm5397",
+ "brcm,bcm5398",
+ "brcm,bcm53010-srab",
+ "brcm,bcm53011-srab",
+ "brcm,bcm53012-srab",
+ "brcm,bcm53018-srab",
+ "brcm,bcm53019-srab",
+ "brcm,bcm5301x-srab",
+ "brcm,bcm11360-srab",
+ "brcm,bcm58522-srab",
+ "brcm,bcm58525-srab",
+ "brcm,bcm58535-srab",
+ "brcm,bcm58622-srab",
+ "brcm,bcm58623-srab",
+ "brcm,bcm58625-srab",
+ "brcm,bcm88312-srab",
+ "brcm,cygnus-srab",
+ "brcm,nsp-srab",
+ "brcm,omega-srab",
+ "brcm,bcm3384-switch",
+ "brcm,bcm6328-switch",
+ "brcm,bcm6368-switch",
+ "brcm,bcm63xx-switch",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_BCM_SF2)
+ "brcm,bcm7445-switch-v4.0",
+ "brcm,bcm7278-switch-v4.0",
+ "brcm,bcm7278-switch-v4.8",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_LANTIQ_GSWIP)
+ "lantiq,xrx200-gswip",
+ "lantiq,xrx300-gswip",
+ "lantiq,xrx330-gswip",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MV88E6060)
+ "marvell,mv88e6060",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MV88E6XXX)
+ "marvell,mv88e6085",
+ "marvell,mv88e6190",
+ "marvell,mv88e6250",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON)
+ "microchip,ksz8765",
+ "microchip,ksz8794",
+ "microchip,ksz8795",
+ "microchip,ksz8863",
+ "microchip,ksz8873",
+ "microchip,ksz9477",
+ "microchip,ksz9897",
+ "microchip,ksz9893",
+ "microchip,ksz9563",
+ "microchip,ksz8563",
+ "microchip,ksz9567",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_SMSC_LAN9303_MDIO)
+ "smsc,lan9303-mdio",
+#endif
+#if IS_ENABLED(CONFIG_NET_DSA_SMSC_LAN9303_I2C)
+ "smsc,lan9303-i2c",
+#endif
+ NULL,
+};
+
+static void dsa_shared_port_validate_of(struct dsa_port *dp,
+ bool *missing_phy_mode,
+ bool *missing_link_description)
+{
+ struct device_node *dn = dp->dn, *phy_np;
+ struct dsa_switch *ds = dp->ds;
+ phy_interface_t mode;
+
+ *missing_phy_mode = false;
+ *missing_link_description = false;
+
+ if (of_get_phy_mode(dn, &mode)) {
+ *missing_phy_mode = true;
+ dev_err(ds->dev,
+ "OF node %pOF of %s port %d lacks the required \"phy-mode\" property\n",
+ dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ }
+
+ /* Note: of_phy_is_fixed_link() also returns true for
+ * managed = "in-band-status"
+ */
+ if (of_phy_is_fixed_link(dn))
+ return;
+
+ phy_np = of_parse_phandle(dn, "phy-handle", 0);
+ if (phy_np) {
+ of_node_put(phy_np);
+ return;
+ }
+
+ *missing_link_description = true;
+
+ dev_err(ds->dev,
+ "OF node %pOF of %s port %d lacks the required \"phy-handle\", \"fixed-link\" or \"managed\" properties\n",
+ dn, dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+}
+
+int dsa_shared_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
- struct device_node *phy_np;
+ bool missing_link_description;
+ bool missing_phy_mode;
int port = dp->index;
+ dsa_shared_port_validate_of(dp, &missing_phy_mode,
+ &missing_link_description);
+
+ if ((missing_phy_mode || missing_link_description) &&
+ !of_device_compatible_match(ds->dev->of_node,
+ dsa_switches_apply_workarounds))
+ return -EINVAL;
+
if (!ds->ops->adjust_link) {
- phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (of_phy_is_fixed_link(dp->dn) || phy_np) {
+ if (missing_link_description) {
+ dev_warn(ds->dev,
+ "Skipping phylink registration for %s port %d\n",
+ dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ } else {
if (ds->ops->phylink_mac_link_down)
ds->ops->phylink_mac_link_down(ds, port,
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
- return dsa_port_phylink_register(dp);
+
+ return dsa_shared_port_phylink_register(dp);
}
return 0;
}
@@ -1234,12 +1979,12 @@ int dsa_port_link_register_of(struct dsa_port *dp)
"Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
if (of_phy_is_fixed_link(dp->dn))
- return dsa_port_fixed_link_register_of(dp);
+ return dsa_shared_port_fixed_link_register_of(dp);
else
- return dsa_port_setup_phy_of(dp, true);
+ return dsa_shared_port_setup_phy_of(dp, true);
}
-void dsa_port_link_unregister_of(struct dsa_port *dp)
+void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
@@ -1247,73 +1992,15 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
- phylink_destroy(dp->pl);
- dp->pl = NULL;
+ dsa_port_phylink_destroy(dp);
return;
}
if (of_phy_is_fixed_link(dp->dn))
of_phy_deregister_fixed_link(dp->dn);
else
- dsa_port_setup_phy_of(dp, false);
-}
-
-int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data)
-{
- struct phy_device *phydev;
- int ret = -EOPNOTSUPP;
-
- if (of_phy_is_fixed_link(dp->dn))
- return ret;
-
- phydev = dsa_port_get_phy_device(dp);
- if (IS_ERR_OR_NULL(phydev))
- return ret;
-
- ret = phy_ethtool_get_strings(phydev, data);
- put_device(&phydev->mdio.dev);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings);
-
-int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data)
-{
- struct phy_device *phydev;
- int ret = -EOPNOTSUPP;
-
- if (of_phy_is_fixed_link(dp->dn))
- return ret;
-
- phydev = dsa_port_get_phy_device(dp);
- if (IS_ERR_OR_NULL(phydev))
- return ret;
-
- ret = phy_ethtool_get_stats(phydev, NULL, data);
- put_device(&phydev->mdio.dev);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats);
-
-int dsa_port_get_phy_sset_count(struct dsa_port *dp)
-{
- struct phy_device *phydev;
- int ret = -EOPNOTSUPP;
-
- if (of_phy_is_fixed_link(dp->dn))
- return ret;
-
- phydev = dsa_port_get_phy_device(dp);
- if (IS_ERR_OR_NULL(phydev))
- return ret;
-
- ret = phy_ethtool_get_sset_count(phydev);
- put_device(&phydev->mdio.dev);
-
- return ret;
+ dsa_shared_port_setup_phy_of(dp, false);
}
-EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
{
@@ -1351,9 +2038,7 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
{
struct dsa_notifier_tag_8021q_vlan_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vid = vid,
};
@@ -1366,9 +2051,7 @@ int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast)
{
struct dsa_notifier_tag_8021q_vlan_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vid = vid,
};
int err;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 22241afcac81..a9fde48cffd4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -19,10 +19,193 @@
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/if_hsr.h>
+#include <net/dcbnl.h>
#include <linux/netpoll.h>
#include "dsa_priv.h"
+static void dsa_slave_standalone_event_work(struct work_struct *work)
+{
+ struct dsa_standalone_event_work *standalone_work =
+ container_of(work, struct dsa_standalone_event_work, work);
+ const unsigned char *addr = standalone_work->addr;
+ struct net_device *dev = standalone_work->dev;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_mdb mdb;
+ struct dsa_switch *ds = dp->ds;
+ u16 vid = standalone_work->vid;
+ int err;
+
+ switch (standalone_work->event) {
+ case DSA_UC_ADD:
+ err = dsa_port_standalone_host_fdb_add(dp, addr, vid);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to add %pM vid %d to fdb: %d\n",
+ dp->index, addr, vid, err);
+ break;
+ }
+ break;
+
+ case DSA_UC_DEL:
+ err = dsa_port_standalone_host_fdb_del(dp, addr, vid);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to delete %pM vid %d from fdb: %d\n",
+ dp->index, addr, vid, err);
+ }
+
+ break;
+ case DSA_MC_ADD:
+ ether_addr_copy(mdb.addr, addr);
+ mdb.vid = vid;
+
+ err = dsa_port_standalone_host_mdb_add(dp, &mdb);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to add %pM vid %d to mdb: %d\n",
+ dp->index, addr, vid, err);
+ break;
+ }
+ break;
+ case DSA_MC_DEL:
+ ether_addr_copy(mdb.addr, addr);
+ mdb.vid = vid;
+
+ err = dsa_port_standalone_host_mdb_del(dp, &mdb);
+ if (err) {
+ dev_err(ds->dev,
+ "port %d failed to delete %pM vid %d from mdb: %d\n",
+ dp->index, addr, vid, err);
+ }
+
+ break;
+ }
+
+ kfree(standalone_work);
+}
+
+static int dsa_slave_schedule_standalone_work(struct net_device *dev,
+ enum dsa_standalone_event event,
+ const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_standalone_event_work *standalone_work;
+
+ standalone_work = kzalloc(sizeof(*standalone_work), GFP_ATOMIC);
+ if (!standalone_work)
+ return -ENOMEM;
+
+ INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work);
+ standalone_work->event = event;
+ standalone_work->dev = dev;
+
+ ether_addr_copy(standalone_work->addr, addr);
+ standalone_work->vid = vid;
+
+ dsa_schedule_work(&standalone_work->work);
+
+ return 0;
+}
+
+static int dsa_slave_sync_uc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ dev_uc_add(master, addr);
+
+ if (!dsa_switch_supports_uc_filtering(dp->ds))
+ return 0;
+
+ return dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0);
+}
+
+static int dsa_slave_unsync_uc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ dev_uc_del(master, addr);
+
+ if (!dsa_switch_supports_uc_filtering(dp->ds))
+ return 0;
+
+ return dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0);
+}
+
+static int dsa_slave_sync_mc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ dev_mc_add(master, addr);
+
+ if (!dsa_switch_supports_mc_filtering(dp->ds))
+ return 0;
+
+ return dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0);
+}
+
+static int dsa_slave_unsync_mc(struct net_device *dev,
+ const unsigned char *addr)
+{
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ dev_mc_del(master, addr);
+
+ if (!dsa_switch_supports_mc_filtering(dp->ds))
+ return 0;
+
+ return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0);
+}
+
+void dsa_slave_sync_ha(struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct netdev_hw_addr *ha;
+
+ netif_addr_lock_bh(dev);
+
+ netdev_for_each_synced_mc_addr(ha, dev)
+ dsa_slave_sync_mc(dev, ha->addr);
+
+ netdev_for_each_synced_uc_addr(ha, dev)
+ dsa_slave_sync_uc(dev, ha->addr);
+
+ netif_addr_unlock_bh(dev);
+
+ if (dsa_switch_supports_uc_filtering(ds) ||
+ dsa_switch_supports_mc_filtering(ds))
+ dsa_flush_workqueue();
+}
+
+void dsa_slave_unsync_ha(struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct netdev_hw_addr *ha;
+
+ netif_addr_lock_bh(dev);
+
+ netdev_for_each_synced_uc_addr(ha, dev)
+ dsa_slave_unsync_uc(dev, ha->addr);
+
+ netdev_for_each_synced_mc_addr(ha, dev)
+ dsa_slave_unsync_mc(dev, ha->addr);
+
+ netif_addr_unlock_bh(dev);
+
+ if (dsa_switch_supports_uc_filtering(ds) ||
+ dsa_switch_supports_mc_filtering(ds))
+ dsa_flush_workqueue();
+}
+
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -67,6 +250,7 @@ static int dsa_slave_open(struct net_device *dev)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int err;
err = dev_open(master, NULL);
@@ -75,38 +259,30 @@ static int dsa_slave_open(struct net_device *dev)
goto out;
}
- if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
- err = dev_uc_add(master, dev->dev_addr);
- if (err < 0)
+ if (dsa_switch_supports_uc_filtering(ds)) {
+ err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0);
+ if (err)
goto out;
}
- if (dev->flags & IFF_ALLMULTI) {
- err = dev_set_allmulti(master, 1);
- if (err < 0)
- goto del_unicast;
- }
- if (dev->flags & IFF_PROMISC) {
- err = dev_set_promiscuity(master, 1);
+ if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
+ err = dev_uc_add(master, dev->dev_addr);
if (err < 0)
- goto clear_allmulti;
+ goto del_host_addr;
}
err = dsa_port_enable_rt(dp, dev->phydev);
if (err)
- goto clear_promisc;
+ goto del_unicast;
return 0;
-clear_promisc:
- if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, -1);
-clear_allmulti:
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(master, -1);
del_unicast:
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
+del_host_addr:
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
out:
return err;
}
@@ -115,68 +291,97 @@ static int dsa_slave_close(struct net_device *dev)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
dsa_port_disable_rt(dp);
- dev_mc_unsync(master, dev);
- dev_uc_unsync(master, dev);
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(master, -1);
- if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(master, -1);
-
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+
return 0;
}
+static void dsa_slave_manage_host_flood(struct net_device *dev)
+{
+ bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ bool uc = dev->flags & IFF_PROMISC;
+
+ dsa_port_set_host_flood(dp, uc, mc);
+}
+
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
struct net_device *master = dsa_slave_to_master(dev);
- if (dev->flags & IFF_UP) {
- if (change & IFF_ALLMULTI)
- dev_set_allmulti(master,
- dev->flags & IFF_ALLMULTI ? 1 : -1);
- if (change & IFF_PROMISC)
- dev_set_promiscuity(master,
- dev->flags & IFF_PROMISC ? 1 : -1);
- }
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(master,
+ dev->flags & IFF_ALLMULTI ? 1 : -1);
+ if (change & IFF_PROMISC)
+ dev_set_promiscuity(master,
+ dev->flags & IFF_PROMISC ? 1 : -1);
+
+ if (dsa_switch_supports_uc_filtering(ds) &&
+ dsa_switch_supports_mc_filtering(ds))
+ dsa_slave_manage_host_flood(dev);
}
static void dsa_slave_set_rx_mode(struct net_device *dev)
{
- struct net_device *master = dsa_slave_to_master(dev);
-
- dev_mc_sync(master, dev);
- dev_uc_sync(master, dev);
+ __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc);
+ __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc);
}
static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
{
struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
struct sockaddr *addr = a;
int err;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+ /* If the port is down, the address isn't synced yet to hardware or
+ * to the DSA master, so there is nothing to change.
+ */
if (!(dev->flags & IFF_UP))
- goto out;
+ goto out_change_dev_addr;
+
+ if (dsa_switch_supports_uc_filtering(ds)) {
+ err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0);
+ if (err)
+ return err;
+ }
if (!ether_addr_equal(addr->sa_data, master->dev_addr)) {
err = dev_uc_add(master, addr->sa_data);
if (err < 0)
- return err;
+ goto del_unicast;
}
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
-out:
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0);
+
+out_change_dev_addr:
eth_hw_addr_set(dev, addr->sa_data);
return 0;
+
+del_unicast:
+ if (dsa_switch_supports_uc_filtering(ds))
+ dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0);
+
+ return err;
}
struct dsa_slave_dump_ctx {
@@ -288,6 +493,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
ret = dsa_port_set_state(dp, attr->u.stp_state, true);
break;
+ case SWITCHDEV_ATTR_ID_PORT_MST_STATE:
+ if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
+ ret = dsa_port_set_mst_state(dp, &attr->u.mst_state, extack);
+ break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev))
return -EOPNOTSUPP;
@@ -301,6 +512,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
ret = dsa_port_ageing_time(dp, attr->u.ageing_time);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MST:
+ if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
+ ret = dsa_port_mst_enable(dp, attr->u.mst, extack);
+ break;
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
return -EOPNOTSUPP;
@@ -314,6 +531,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
break;
+ case SWITCHDEV_ATTR_ID_VLAN_MSTI:
+ if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
+ ret = dsa_port_vlan_msti(dp, &attr->u.vlan_msti);
+ break;
default:
ret = -EOPNOTSUPP;
break;
@@ -348,9 +571,8 @@ static int dsa_slave_vlan_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct switchdev_obj_port_vlan vlan;
+ struct switchdev_obj_port_vlan *vlan;
int err;
if (dsa_port_skip_vlan_configuration(dp)) {
@@ -358,14 +580,14 @@ static int dsa_slave_vlan_add(struct net_device *dev,
return 0;
}
- vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
/* Deny adding a bridge VLAN when there is already an 802.1Q upper with
* the same VID.
*/
if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) {
rcu_read_lock();
- err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan);
+ err = dsa_slave_vlan_check_for_8021q_uppers(dev, vlan);
rcu_read_unlock();
if (err) {
NL_SET_ERR_MSG_MOD(extack,
@@ -374,21 +596,36 @@ static int dsa_slave_vlan_add(struct net_device *dev,
}
}
- err = dsa_port_vlan_add(dp, &vlan, extack);
- if (err)
- return err;
+ return dsa_port_vlan_add(dp, vlan, extack);
+}
+
+/* Offload a VLAN installed on the bridge or on a foreign interface by
+ * installing it as a VLAN towards the CPU port.
+ */
+static int dsa_slave_host_vlan_add(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_vlan vlan;
+
+ /* Do nothing if this is a software bridge */
+ if (!dp->bridge)
+ return -EOPNOTSUPP;
+
+ if (dsa_port_skip_vlan_configuration(dp)) {
+ NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN");
+ return 0;
+ }
- /* We need the dedicated CPU port to be a member of the VLAN as well.
- * Even though drivers often handle CPU membership in special ways,
+ vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+ /* Even though drivers often handle CPU membership in special ways,
* it doesn't make sense to program a PVID, so clear this flag.
*/
vlan.flags &= ~BRIDGE_VLAN_INFO_PVID;
- err = dsa_port_vlan_add(dp->cpu_dp, &vlan, extack);
- if (err)
- return err;
-
- return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid);
+ return dsa_port_host_vlan_add(dp, &vlan, extack);
}
static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
@@ -412,13 +649,13 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
return -EOPNOTSUPP;
- err = dsa_port_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_bridge_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
- return -EOPNOTSUPP;
-
- err = dsa_slave_vlan_add(dev, obj, extack);
+ if (dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+ err = dsa_slave_vlan_add(dev, obj, extack);
+ else
+ err = dsa_slave_host_vlan_add(dev, obj, extack);
break;
case SWITCHDEV_OBJ_ID_MRP:
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
@@ -444,26 +681,33 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
static int dsa_slave_vlan_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan *vlan;
- int err;
if (dsa_port_skip_vlan_configuration(dp))
return 0;
vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
- /* Do not deprogram the CPU port as it may be shared with other user
- * ports which can be members of this VLAN as well.
- */
- err = dsa_port_vlan_del(dp, vlan);
- if (err)
- return err;
+ return dsa_port_vlan_del(dp, vlan);
+}
+
+static int dsa_slave_host_vlan_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_vlan *vlan;
- vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid);
+ /* Do nothing if this is a software bridge */
+ if (!dp->bridge)
+ return -EOPNOTSUPP;
- return 0;
+ if (dsa_port_skip_vlan_configuration(dp))
+ return 0;
+
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+
+ return dsa_port_host_vlan_del(dp, vlan);
}
static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
@@ -486,13 +730,13 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
return -EOPNOTSUPP;
- err = dsa_port_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_bridge_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
- return -EOPNOTSUPP;
-
- err = dsa_slave_vlan_del(dev, obj);
+ if (dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+ err = dsa_slave_vlan_del(dev, obj);
+ else
+ err = dsa_slave_host_vlan_del(dev, obj);
break;
case SWITCHDEV_OBJ_ID_MRP:
if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev))
@@ -515,26 +759,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
return err;
}
-static int dsa_slave_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
- struct dsa_switch_tree *dst = ds->dst;
-
- /* For non-legacy ports, devlink is used and it takes
- * care of the name generation. This ndo implementation
- * should be removed with legacy support.
- */
- if (dp->ds->devlink)
- return -EOPNOTSUPP;
-
- ppid->id_len = sizeof(dst->index);
- memcpy(&ppid->id, &dst->index, ppid->id_len);
-
- return 0;
-}
-
static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
struct sk_buff *skb)
{
@@ -644,9 +868,9 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
static void dsa_slave_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
- strlcpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
- strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
- strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
+ strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver));
+ strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
+ strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
static int dsa_slave_get_regs_len(struct net_device *dev)
@@ -753,10 +977,10 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
s = per_cpu_ptr(dev->tstats, i);
do {
start = u64_stats_fetch_begin_irq(&s->syncp);
- tx_packets = s->tx_packets;
- tx_bytes = s->tx_bytes;
- rx_packets = s->rx_packets;
- rx_bytes = s->rx_bytes;
+ tx_packets = u64_stats_read(&s->tx_packets);
+ tx_bytes = u64_stats_read(&s->tx_bytes);
+ rx_packets = u64_stats_read(&s->rx_packets);
+ rx_bytes = u64_stats_read(&s->rx_bytes);
} while (u64_stats_fetch_retry_irq(&s->syncp, start));
data[0] += tx_packets;
data[1] += tx_bytes;
@@ -820,6 +1044,18 @@ dsa_slave_get_eth_ctrl_stats(struct net_device *dev,
ds->ops->get_eth_ctrl_stats(ds, dp->index, ctrl_stats);
}
+static void
+dsa_slave_get_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_rmon_stats)
+ ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges);
+}
+
static void dsa_slave_net_selftest(struct net_device *ndev,
struct ethtool_test *etest, u64 *buf)
{
@@ -915,6 +1151,16 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev,
return phylink_ethtool_ksettings_set(dp->pl, cmd);
}
+static void dsa_slave_get_pause_stats(struct net_device *dev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_pause_stats)
+ ds->ops->get_pause_stats(ds, dp->index, pause_stats);
+}
+
static void dsa_slave_get_pauseparam(struct net_device *dev,
struct ethtool_pauseparam *pause)
{
@@ -973,24 +1219,6 @@ static void dsa_slave_poll_controller(struct net_device *dev)
}
#endif
-static int dsa_slave_get_phys_port_name(struct net_device *dev,
- char *name, size_t len)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- /* For non-legacy ports, devlink is used and it takes
- * care of the name generation. This ndo implementation
- * should be removed with legacy support.
- */
- if (dp->ds->devlink)
- return -EOPNOTSUPP;
-
- if (snprintf(name, len, "p%d", dp->index) >= len)
- return -EINVAL;
-
- return 0;
-}
-
static struct dsa_mall_tc_entry *
dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
{
@@ -1009,6 +1237,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
bool ingress)
{
+ struct netlink_ext_ack *extack = cls->common.extack;
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_mirror_tc_entry *mirror;
@@ -1046,7 +1275,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
mirror->to_local_port = to_dp->index;
mirror->ingress = ingress;
- err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
+ err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress, extack);
if (err) {
kfree(mall_tc_entry);
return err;
@@ -1316,8 +1545,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
void *type_data)
{
- struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
- struct net_device *master = cpu_dp->master;
+ struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port));
if (!master->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
@@ -1385,7 +1613,6 @@ static int dsa_slave_get_ts_info(struct net_device *dev,
static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
@@ -1405,7 +1632,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
}
/* And CPU port... */
- ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &extack);
+ ret = dsa_port_host_vlan_add(dp, &vlan, &extack);
if (ret) {
if (extack._msg)
netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index,
@@ -1413,13 +1640,12 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
return ret;
}
- return vlan_vid_add(master, proto, vid);
+ return 0;
}
static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
- struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.vid = vid,
@@ -1428,16 +1654,11 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
};
int err;
- /* Do not deprogram the CPU port as it may be shared with other user
- * ports which can be members of this VLAN as well.
- */
err = dsa_port_vlan_del(dp, &vlan);
if (err)
return err;
- vlan_vid_del(master, proto, vid);
-
- return 0;
+ return dsa_port_host_vlan_del(dp, &vlan);
}
static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
@@ -1624,11 +1845,9 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- struct dsa_port *dp_iter;
- struct dsa_port *cpu_dp;
- int port = p->dp->index;
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
int largest_mtu = 0;
int new_master_mtu;
int old_master_mtu;
@@ -1639,33 +1858,28 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
if (!ds->ops->port_change_mtu)
return -EOPNOTSUPP;
- list_for_each_entry(dp_iter, &ds->dst->ports, list) {
+ dsa_tree_for_each_user_port(other_dp, ds->dst) {
int slave_mtu;
- if (!dsa_port_is_user(dp_iter))
- continue;
-
/* During probe, this function will be called for each slave
* device, while not all of them have been allocated. That's
* ok, it doesn't change what the maximum is, so ignore it.
*/
- if (!dp_iter->slave)
+ if (!other_dp->slave)
continue;
/* Pretend that we already applied the setting, which we
* actually haven't (still haven't done all integrity checks)
*/
- if (dp_iter == dp)
+ if (dp == other_dp)
slave_mtu = new_mtu;
else
- slave_mtu = dp_iter->slave->mtu;
+ slave_mtu = other_dp->slave->mtu;
if (largest_mtu < slave_mtu)
largest_mtu = slave_mtu;
}
- cpu_dp = dsa_to_port(ds, port)->cpu_dp;
-
mtu_limit = min_t(int, master->max_mtu, dev->max_mtu);
old_master_mtu = master->mtu;
new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops);
@@ -1684,15 +1898,14 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
goto out_master_failed;
/* We only need to propagate the MTU of the CPU port to
- * upstream switches, so create a non-targeted notifier which
- * updates all switches.
+ * upstream switches, so emit a notifier which updates them.
*/
- err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false);
+ err = dsa_port_mtu_change(cpu_dp, cpu_mtu);
if (err)
goto out_cpu_failed;
}
- err = dsa_port_mtu_change(dp, new_mtu, true);
+ err = ds->ops->port_change_mtu(ds, dp->index, new_mtu);
if (err)
goto out_port_failed;
@@ -1705,8 +1918,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
out_port_failed:
if (new_master_mtu != old_master_mtu)
dsa_port_mtu_change(cpu_dp, old_master_mtu -
- dsa_tag_protocol_overhead(cpu_dp->tag_ops),
- false);
+ dsa_tag_protocol_overhead(cpu_dp->tag_ops));
out_cpu_failed:
if (new_master_mtu != old_master_mtu)
dev_set_mtu(master, old_master_mtu);
@@ -1714,6 +1926,209 @@ out_master_failed:
return err;
}
+static int __maybe_unused
+dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ unsigned long mask, new_prio;
+ int err, port = dp->index;
+
+ if (!ds->ops->port_set_default_prio)
+ return -EOPNOTSUPP;
+
+ err = dcb_ieee_setapp(dev, app);
+ if (err)
+ return err;
+
+ mask = dcb_ieee_getapp_mask(dev, app);
+ new_prio = __fls(mask);
+
+ err = ds->ops->port_set_default_prio(ds, port, new_prio);
+ if (err) {
+ dcb_ieee_delapp(dev, app);
+ return err;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused
+dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ unsigned long mask, new_prio;
+ int err, port = dp->index;
+ u8 dscp = app->protocol;
+
+ if (!ds->ops->port_add_dscp_prio)
+ return -EOPNOTSUPP;
+
+ if (dscp >= 64) {
+ netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n",
+ dscp);
+ return -EINVAL;
+ }
+
+ err = dcb_ieee_setapp(dev, app);
+ if (err)
+ return err;
+
+ mask = dcb_ieee_getapp_mask(dev, app);
+ new_prio = __fls(mask);
+
+ err = ds->ops->port_add_dscp_prio(ds, port, dscp, new_prio);
+ if (err) {
+ dcb_ieee_delapp(dev, app);
+ return err;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused dsa_slave_dcbnl_ieee_setapp(struct net_device *dev,
+ struct dcb_app *app)
+{
+ switch (app->selector) {
+ case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+ switch (app->protocol) {
+ case 0:
+ return dsa_slave_dcbnl_set_default_prio(dev, app);
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ case IEEE_8021QAZ_APP_SEL_DSCP:
+ return dsa_slave_dcbnl_add_dscp_prio(dev, app);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int __maybe_unused
+dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ unsigned long mask, new_prio;
+ int err, port = dp->index;
+
+ if (!ds->ops->port_set_default_prio)
+ return -EOPNOTSUPP;
+
+ err = dcb_ieee_delapp(dev, app);
+ if (err)
+ return err;
+
+ mask = dcb_ieee_getapp_mask(dev, app);
+ new_prio = mask ? __fls(mask) : 0;
+
+ err = ds->ops->port_set_default_prio(ds, port, new_prio);
+ if (err) {
+ dcb_ieee_setapp(dev, app);
+ return err;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused
+dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int err, port = dp->index;
+ u8 dscp = app->protocol;
+
+ if (!ds->ops->port_del_dscp_prio)
+ return -EOPNOTSUPP;
+
+ err = dcb_ieee_delapp(dev, app);
+ if (err)
+ return err;
+
+ err = ds->ops->port_del_dscp_prio(ds, port, dscp, app->priority);
+ if (err) {
+ dcb_ieee_setapp(dev, app);
+ return err;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev,
+ struct dcb_app *app)
+{
+ switch (app->selector) {
+ case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+ switch (app->protocol) {
+ case 0:
+ return dsa_slave_dcbnl_del_default_prio(dev, app);
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ case IEEE_8021QAZ_APP_SEL_DSCP:
+ return dsa_slave_dcbnl_del_dscp_prio(dev, app);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/* Pre-populate the DCB application priority table with the priorities
+ * configured during switch setup, which we read from hardware here.
+ */
+static int dsa_slave_dcbnl_init(struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ int err;
+
+ if (ds->ops->port_get_default_prio) {
+ int prio = ds->ops->port_get_default_prio(ds, port);
+ struct dcb_app app = {
+ .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
+ .protocol = 0,
+ .priority = prio,
+ };
+
+ if (prio < 0)
+ return prio;
+
+ err = dcb_ieee_setapp(dev, &app);
+ if (err)
+ return err;
+ }
+
+ if (ds->ops->port_get_dscp_prio) {
+ int protocol;
+
+ for (protocol = 0; protocol < 64; protocol++) {
+ struct dcb_app app = {
+ .selector = IEEE_8021QAZ_APP_SEL_DSCP,
+ .protocol = protocol,
+ };
+ int prio;
+
+ prio = ds->ops->port_get_dscp_prio(ds, port, protocol);
+ if (prio == -EOPNOTSUPP)
+ continue;
+ if (prio < 0)
+ return prio;
+
+ app.priority = prio;
+
+ err = dcb_ieee_setapp(dev, &app);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
@@ -1729,12 +2144,14 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eth_phy_stats = dsa_slave_get_eth_phy_stats,
.get_eth_mac_stats = dsa_slave_get_eth_mac_stats,
.get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats,
+ .get_rmon_stats = dsa_slave_get_rmon_stats,
.set_wol = dsa_slave_set_wol,
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
.get_eee = dsa_slave_get_eee,
.get_link_ksettings = dsa_slave_get_link_ksettings,
.set_link_ksettings = dsa_slave_set_link_ksettings,
+ .get_pause_stats = dsa_slave_get_pause_stats,
.get_pauseparam = dsa_slave_get_pauseparam,
.set_pauseparam = dsa_slave_set_pauseparam,
.get_rxnfc = dsa_slave_get_rxnfc,
@@ -1743,11 +2160,16 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.self_test = dsa_slave_net_selftest,
};
+static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = {
+ .ieee_setapp = dsa_slave_dcbnl_ieee_setapp,
+ .ieee_delapp = dsa_slave_dcbnl_ieee_delapp,
+};
+
static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- return dp->ds->devlink ? &dp->devlink_port : NULL;
+ return &dp->devlink_port;
}
static void dsa_slave_get_stats64(struct net_device *dev,
@@ -1766,13 +2188,14 @@ static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
struct net_device_path *path)
{
struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
+ struct net_device *master = dsa_port_to_master(dp);
struct dsa_port *cpu_dp = dp->cpu_dp;
path->dev = ctx->dev;
path->type = DEV_PATH_DSA;
path->dsa.proto = cpu_dp->tag_ops->proto;
path->dsa.port = dp->index;
- ctx->dev = cpu_dp->master;
+ ctx->dev = master;
return 0;
}
@@ -1792,10 +2215,8 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup,
.ndo_poll_controller = dsa_slave_poll_controller,
#endif
- .ndo_get_phys_port_name = dsa_slave_get_phys_port_name,
.ndo_setup_tc = dsa_slave_setup_tc,
.ndo_get_stats64 = dsa_slave_get_stats64,
- .ndo_get_port_parent_id = dsa_slave_get_port_parent_id,
.ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
.ndo_get_devlink_port = dsa_slave_get_devlink_port,
@@ -1883,7 +2304,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
if (ret) {
netdev_err(slave_dev, "failed to connect to PHY: %pe\n",
ERR_PTR(ret));
- phylink_destroy(dp->pl);
+ dsa_port_phylink_destroy(dp);
}
return ret;
@@ -1892,9 +2313,9 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
void dsa_slave_setup_tagger(struct net_device *slave)
{
struct dsa_port *dp = dsa_slave_to_port(slave);
+ struct net_device *master = dsa_port_to_master(dp);
struct dsa_slave_priv *p = netdev_priv(slave);
const struct dsa_port *cpu_dp = dp->cpu_dp;
- struct net_device *master = cpu_dp->master;
const struct dsa_switch *ds = dp->ds;
slave->needed_headroom = cpu_dp->tag_ops->needed_headroom;
@@ -1917,15 +2338,6 @@ void dsa_slave_setup_tagger(struct net_device *slave)
slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
-static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
-static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock,
- &dsa_slave_netdev_xmit_lock_key);
-}
-
int dsa_slave_suspend(struct net_device *slave_dev)
{
struct dsa_port *dp = dsa_slave_to_port(slave_dev);
@@ -1960,8 +2372,7 @@ int dsa_slave_resume(struct net_device *slave_dev)
int dsa_slave_create(struct dsa_port *port)
{
- const struct dsa_port *cpu_dp = port->cpu_dp;
- struct net_device *master = cpu_dp->master;
+ struct net_device *master = dsa_port_to_master(port);
struct dsa_switch *ds = port->ds;
const char *name = port->name;
struct net_device *slave_dev;
@@ -1977,20 +2388,23 @@ int dsa_slave_create(struct dsa_port *port)
if (slave_dev == NULL)
return -ENOMEM;
+ slave_dev->rtnl_link_ops = &dsa_link_ops;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
+#if IS_ENABLED(CONFIG_DCB)
+ slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops;
+#endif
if (!is_zero_ether_addr(port->mac))
eth_hw_addr_set(slave_dev, port->mac);
else
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
+ if (dsa_switch_supports_uc_filtering(ds))
+ slave_dev->priv_flags |= IFF_UNICAST_FLT;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
if (ds->ops->port_max_mtu)
slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
- netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
- NULL);
-
SET_NETDEV_DEV(slave_dev, port->ds->dev);
slave_dev->dev.of_node = port->dn;
slave_dev->vlan_features = master->vlan_features;
@@ -2036,6 +2450,17 @@ int dsa_slave_create(struct dsa_port *port)
goto out_phy;
}
+ if (IS_ENABLED(CONFIG_DCB)) {
+ ret = dsa_slave_dcbnl_init(slave_dev);
+ if (ret) {
+ netdev_err(slave_dev,
+ "failed to initialize DCB: %pe\n",
+ ERR_PTR(ret));
+ rtnl_unlock();
+ goto out_unregister;
+ }
+ }
+
ret = netdev_upper_dev_link(master, slave_dev, NULL);
rtnl_unlock();
@@ -2051,7 +2476,7 @@ out_phy:
rtnl_lock();
phylink_disconnect_phy(p->dp->pl);
rtnl_unlock();
- phylink_destroy(p->dp->pl);
+ dsa_port_phylink_destroy(p->dp);
out_gcells:
gro_cells_destroy(&p->gcells);
out_free:
@@ -2074,12 +2499,89 @@ void dsa_slave_destroy(struct net_device *slave_dev)
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
- phylink_destroy(dp->pl);
+ dsa_port_phylink_destroy(dp);
gro_cells_destroy(&p->gcells);
free_percpu(slave_dev->tstats);
free_netdev(slave_dev);
}
+int dsa_slave_change_master(struct net_device *dev, struct net_device *master,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *old_master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+ struct net_device *upper;
+ struct list_head *iter;
+ int err;
+
+ if (master == old_master)
+ return 0;
+
+ if (!ds->ops->port_change_master) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Driver does not support changing DSA master");
+ return -EOPNOTSUPP;
+ }
+
+ if (!netdev_uses_dsa(master)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Interface not eligible as DSA master");
+ return -EOPNOTSUPP;
+ }
+
+ netdev_for_each_upper_dev_rcu(master, upper, iter) {
+ if (dsa_slave_dev_check(upper))
+ continue;
+ if (netif_is_bridge_master(upper))
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers");
+ return -EOPNOTSUPP;
+ }
+
+ /* Since we allow live-changing the DSA master, plus we auto-open the
+ * DSA master when the user port opens => we need to ensure that the
+ * new DSA master is open too.
+ */
+ if (dev->flags & IFF_UP) {
+ err = dev_open(master, extack);
+ if (err)
+ return err;
+ }
+
+ netdev_upper_dev_unlink(old_master, dev);
+
+ err = netdev_upper_dev_link(master, dev, extack);
+ if (err)
+ goto out_revert_old_master_unlink;
+
+ err = dsa_port_change_master(dp, master, extack);
+ if (err)
+ goto out_revert_master_link;
+
+ /* Update the MTU of the new CPU port through cross-chip notifiers */
+ err = dsa_slave_change_mtu(dev, dev->mtu);
+ if (err && err != -EOPNOTSUPP) {
+ netdev_warn(dev,
+ "nonfatal error updating MTU with new master: %pe\n",
+ ERR_PTR(err));
+ }
+
+ /* If the port doesn't have its own MAC address and relies on the DSA
+ * master's one, inherit it again from the new DSA master.
+ */
+ if (is_zero_ether_addr(dp->mac))
+ eth_hw_addr_inherit(dev, master);
+
+ return 0;
+
+out_revert_master_link:
+ netdev_upper_dev_unlink(master, dev);
+out_revert_old_master_unlink:
+ netdev_upper_dev_link(old_master, dev, NULL);
+ return err;
+}
+
bool dsa_slave_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &dsa_slave_netdev_ops;
@@ -2093,6 +2595,9 @@ static int dsa_slave_changeupper(struct net_device *dev,
struct netlink_ext_ack *extack;
int err = NOTIFY_DONE;
+ if (!dsa_slave_dev_check(dev))
+ return err;
+
extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev)) {
@@ -2101,8 +2606,9 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(extack,
- "Offloading not supported");
+ if (extack && !extack->_msg)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2147,6 +2653,9 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ if (!dsa_slave_dev_check(dev))
+ return NOTIFY_DONE;
+
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
@@ -2167,12 +2676,15 @@ dsa_slave_lag_changeupper(struct net_device *dev,
int err = NOTIFY_DONE;
struct dsa_port *dp;
+ if (!netif_is_lag_master(dev))
+ return err;
+
netdev_for_each_lower_dev(dev, lower, iter) {
if (!dsa_slave_dev_check(lower))
continue;
dp = dsa_slave_to_port(lower);
- if (!dp->lag_dev)
+ if (!dp->lag)
/* Software LAG */
continue;
@@ -2196,12 +2708,15 @@ dsa_slave_lag_prechangeupper(struct net_device *dev,
int err = NOTIFY_DONE;
struct dsa_port *dp;
+ if (!netif_is_lag_master(dev))
+ return err;
+
netdev_for_each_lower_dev(dev, lower, iter) {
if (!dsa_slave_dev_check(lower))
continue;
dp = dsa_slave_to_port(lower);
- if (!dp->lag_dev)
+ if (!dp->lag)
/* Software LAG */
continue;
@@ -2303,6 +2818,277 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
return NOTIFY_DONE;
}
+/* To be eligible as a DSA master, a LAG must have all lower interfaces be
+ * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of
+ * switches in the same switch tree.
+ */
+static int dsa_lag_master_validate(struct net_device *lag_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *lower1, *lower2;
+ struct list_head *iter1, *iter2;
+
+ netdev_for_each_lower_dev(lag_dev, lower1, iter1) {
+ netdev_for_each_lower_dev(lag_dev, lower2, iter2) {
+ if (!netdev_uses_dsa(lower1) ||
+ !netdev_uses_dsa(lower2)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "All LAG ports must be eligible as DSA masters");
+ return notifier_from_errno(-EINVAL);
+ }
+
+ if (lower1 == lower2)
+ continue;
+
+ if (!dsa_port_tree_same(lower1->dsa_ptr,
+ lower2->dsa_ptr)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "LAG contains DSA masters of disjoint switch trees");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int
+dsa_master_prechangeupper_sanity_check(struct net_device *master,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (!netdev_uses_dsa(master))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ /* Allow DSA switch uppers */
+ if (dsa_slave_dev_check(info->upper_dev))
+ return NOTIFY_DONE;
+
+ /* Allow bridge uppers of DSA masters, subject to further
+ * restrictions in dsa_bridge_prechangelower_sanity_check()
+ */
+ if (netif_is_bridge_master(info->upper_dev))
+ return NOTIFY_DONE;
+
+ /* Allow LAG uppers, subject to further restrictions in
+ * dsa_lag_master_prechangelower_sanity_check()
+ */
+ if (netif_is_lag_master(info->upper_dev))
+ return dsa_lag_master_validate(info->upper_dev, extack);
+
+ NL_SET_ERR_MSG_MOD(extack,
+ "DSA master cannot join unknown upper interfaces");
+ return notifier_from_errno(-EBUSY);
+}
+
+static int
+dsa_lag_master_prechangelower_sanity_check(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info);
+ struct net_device *lag_dev = info->upper_dev;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ if (!netdev_uses_dsa(dev)) {
+ NL_SET_ERR_MSG(extack,
+ "Only DSA masters can join a LAG DSA master");
+ return notifier_from_errno(-EINVAL);
+ }
+
+ netdev_for_each_lower_dev(lag_dev, lower, iter) {
+ if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) {
+ NL_SET_ERR_MSG(extack,
+ "Interface is DSA master for a different switch tree than this LAG");
+ return notifier_from_errno(-EINVAL);
+ }
+
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/* Don't allow bridging of DSA masters, since the bridge layer rx_handler
+ * prevents the DSA fake ethertype handler to be invoked, so we don't get the
+ * chance to strip off and parse the DSA switch tag protocol header (the bridge
+ * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these
+ * frames).
+ * The only case where that would not be an issue is when bridging can already
+ * be offloaded, such as when the DSA master is itself a DSA or plain switchdev
+ * port, and is bridged only with other ports from the same hardware device.
+ */
+static int
+dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *br = info->upper_dev;
+ struct netlink_ext_ack *extack;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(br))
+ return NOTIFY_DONE;
+
+ if (!info->linking)
+ return NOTIFY_DONE;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ netdev_for_each_lower_dev(br, lower, iter) {
+ if (!netdev_uses_dsa(new_lower) && !netdev_uses_dsa(lower))
+ continue;
+
+ if (!netdev_port_same_parent_id(lower, new_lower)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot do software bridging with a DSA master");
+ return notifier_from_errno(-EINVAL);
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst,
+ struct net_device *lag_dev)
+{
+ struct net_device *new_master = dsa_tree_find_first_master(dst);
+ struct dsa_port *dp;
+ int err;
+
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (dsa_port_to_master(dp) != lag_dev)
+ continue;
+
+ err = dsa_slave_change_master(dp->slave, new_master, NULL);
+ if (err) {
+ netdev_err(dp->slave,
+ "failed to restore master to %s: %pe\n",
+ new_master->name, ERR_PTR(err));
+ }
+ }
+}
+
+static int dsa_master_lag_join(struct net_device *master,
+ struct net_device *lag_dev,
+ struct netdev_lag_upper_info *uinfo,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_port *dp;
+ int err;
+
+ err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack);
+ if (err)
+ return err;
+
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (dsa_port_to_master(dp) != master)
+ continue;
+
+ err = dsa_slave_change_master(dp->slave, lag_dev, extack);
+ if (err)
+ goto restore;
+ }
+
+ return 0;
+
+restore:
+ dsa_tree_for_each_user_port_continue_reverse(dp, dst) {
+ if (dsa_port_to_master(dp) != lag_dev)
+ continue;
+
+ err = dsa_slave_change_master(dp->slave, master, NULL);
+ if (err) {
+ netdev_err(dp->slave,
+ "failed to restore master to %s: %pe\n",
+ master->name, ERR_PTR(err));
+ }
+ }
+
+ dsa_master_lag_teardown(lag_dev, master->dsa_ptr);
+
+ return err;
+}
+
+static void dsa_master_lag_leave(struct net_device *master,
+ struct net_device *lag_dev)
+{
+ struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_port *new_cpu_dp = NULL;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(lag_dev, lower, iter) {
+ if (netdev_uses_dsa(lower)) {
+ new_cpu_dp = lower->dsa_ptr;
+ break;
+ }
+ }
+
+ if (new_cpu_dp) {
+ /* Update the CPU port of the user ports still under the LAG
+ * so that dsa_port_to_master() continues to work properly
+ */
+ dsa_tree_for_each_user_port(dp, dst)
+ if (dsa_port_to_master(dp) == lag_dev)
+ dp->cpu_dp = new_cpu_dp;
+
+ /* Update the index of the virtual CPU port to match the lowest
+ * physical CPU port
+ */
+ lag_dev->dsa_ptr = new_cpu_dp;
+ wmb();
+ } else {
+ /* If the LAG DSA master has no ports left, migrate back all
+ * user ports to the first physical CPU port
+ */
+ dsa_tree_migrate_ports_from_lag_master(dst, lag_dev);
+ }
+
+ /* This DSA master has left its LAG in any case, so let
+ * the CPU port leave the hardware LAG as well
+ */
+ dsa_master_lag_teardown(lag_dev, master->dsa_ptr);
+}
+
+static int dsa_master_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netlink_ext_ack *extack;
+ int err = NOTIFY_DONE;
+
+ if (!netdev_uses_dsa(dev))
+ return err;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (netif_is_lag_master(info->upper_dev)) {
+ if (info->linking) {
+ err = dsa_master_lag_join(dev, info->upper_dev,
+ info->upper_info, extack);
+ err = notifier_from_errno(err);
+ } else {
+ dsa_master_lag_leave(dev, info->upper_dev);
+ err = NOTIFY_OK;
+ }
+ }
+
+ return err;
+}
+
static int dsa_slave_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2314,38 +3100,100 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
int err;
err = dsa_slave_prechangeupper_sanity_check(dev, info);
- if (err != NOTIFY_DONE)
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_master_prechangeupper_sanity_check(dev, info);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_lag_master_prechangelower_sanity_check(dev, info);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_bridge_prechangelower_sanity_check(dev, info);
+ if (notifier_to_errno(err))
return err;
- if (dsa_slave_dev_check(dev))
- return dsa_slave_prechangeupper(dev, ptr);
+ err = dsa_slave_prechangeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
- if (netif_is_lag_master(dev))
- return dsa_slave_lag_prechangeupper(dev, ptr);
+ err = dsa_slave_lag_prechangeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
break;
}
- case NETDEV_CHANGEUPPER:
- if (dsa_slave_dev_check(dev))
- return dsa_slave_changeupper(dev, ptr);
+ case NETDEV_CHANGEUPPER: {
+ int err;
- if (netif_is_lag_master(dev))
- return dsa_slave_lag_changeupper(dev, ptr);
+ err = dsa_slave_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_slave_lag_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
+
+ err = dsa_master_changeupper(dev, ptr);
+ if (notifier_to_errno(err))
+ return err;
break;
+ }
case NETDEV_CHANGELOWERSTATE: {
struct netdev_notifier_changelowerstate_info *info = ptr;
struct dsa_port *dp;
- int err;
+ int err = 0;
- if (!dsa_slave_dev_check(dev))
- break;
+ if (dsa_slave_dev_check(dev)) {
+ dp = dsa_slave_to_port(dev);
+
+ err = dsa_port_lag_change(dp, info->lower_state_info);
+ }
+
+ /* Mirror LAG port events on DSA masters that are in
+ * a LAG towards their respective switch CPU ports
+ */
+ if (netdev_uses_dsa(dev)) {
+ dp = dev->dsa_ptr;
- dp = dsa_slave_to_port(dev);
+ err = dsa_port_lag_change(dp, info->lower_state_info);
+ }
- err = dsa_port_lag_change(dp, info->lower_state_info);
return notifier_from_errno(err);
}
+ case NETDEV_CHANGE:
+ case NETDEV_UP: {
+ /* Track state of master port.
+ * DSA driver may require the master port (and indirectly
+ * the tagger) to be available for some special operation.
+ */
+ if (netdev_uses_dsa(dev)) {
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->ds->dst;
+
+ /* Track when the master port is UP */
+ dsa_tree_master_oper_state_change(dst, dev,
+ netif_oper_up(dev));
+
+ /* Track when the master port is ready and can accept
+ * packet.
+ * NETDEV_UP event is not enough to flag a port as ready.
+ * We also have to wait for linkwatch_do_dev to dev_activate
+ * and emit a NETDEV_CHANGE event.
+ * We check if a master port is ready by checking if the dev
+ * have a qdisc assigned and is not noop.
+ */
+ dsa_tree_master_admin_state_change(dst, dev,
+ !qdisc_tx_is_noop(dev));
+
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+ }
case NETDEV_GOING_DOWN: {
struct dsa_port *dp, *cpu_dp;
struct dsa_switch_tree *dst;
@@ -2357,10 +3205,15 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
cpu_dp = dev->dsa_ptr;
dst = cpu_dp->ds->dst;
+ dsa_tree_master_admin_state_change(dst, dev, false);
+
list_for_each_entry(dp, &dst->ports, list) {
if (!dsa_port_is_user(dp))
continue;
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
list_add(&dp->slave->close_list, &close_list);
}
@@ -2379,43 +3232,40 @@ static void
dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
{
struct switchdev_notifier_fdb_info info = {};
- struct dsa_switch *ds = switchdev_work->ds;
- struct dsa_port *dp;
-
- if (!dsa_is_user_port(ds, switchdev_work->port))
- return;
info.addr = switchdev_work->addr;
info.vid = switchdev_work->vid;
info.offloaded = true;
- dp = dsa_to_port(ds, switchdev_work->port);
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
- dp->slave, &info.info, NULL);
+ switchdev_work->orig_dev, &info.info, NULL);
}
static void dsa_slave_switchdev_event_work(struct work_struct *work)
{
struct dsa_switchdev_event_work *switchdev_work =
container_of(work, struct dsa_switchdev_event_work, work);
- struct dsa_switch *ds = switchdev_work->ds;
+ const unsigned char *addr = switchdev_work->addr;
+ struct net_device *dev = switchdev_work->dev;
+ u16 vid = switchdev_work->vid;
+ struct dsa_switch *ds;
struct dsa_port *dp;
int err;
- dp = dsa_to_port(ds, switchdev_work->port);
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
if (switchdev_work->host_addr)
- err = dsa_port_host_fdb_add(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_bridge_host_fdb_add(dp, addr, vid);
+ else if (dp->lag)
+ err = dsa_port_lag_fdb_add(dp, addr, vid);
else
- err = dsa_port_fdb_add(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_fdb_add(dp, addr, vid);
if (err) {
dev_err(ds->dev,
"port %d failed to add %pM vid %d to fdb: %d\n",
- dp->index, switchdev_work->addr,
- switchdev_work->vid, err);
+ dp->index, addr, vid, err);
break;
}
dsa_fdb_offload_notify(switchdev_work);
@@ -2423,16 +3273,15 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
case SWITCHDEV_FDB_DEL_TO_DEVICE:
if (switchdev_work->host_addr)
- err = dsa_port_host_fdb_del(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_bridge_host_fdb_del(dp, addr, vid);
+ else if (dp->lag)
+ err = dsa_port_lag_fdb_del(dp, addr, vid);
else
- err = dsa_port_fdb_del(dp, switchdev_work->addr,
- switchdev_work->vid);
+ err = dsa_port_fdb_del(dp, addr, vid);
if (err) {
dev_err(ds->dev,
"port %d failed to delete %pM vid %d from fdb: %d\n",
- dp->index, switchdev_work->addr,
- switchdev_work->vid, err);
+ dp->index, addr, vid, err);
}
break;
@@ -2470,19 +3319,20 @@ static int dsa_slave_fdb_event(struct net_device *dev,
if (ctx && ctx != dp)
return 0;
- if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
- return -EOPNOTSUPP;
-
- if (dsa_slave_dev_check(orig_dev) &&
- switchdev_fdb_is_dynamically_learned(fdb_info))
+ if (!dp->bridge)
return 0;
- /* FDB entries learned by the software bridge should be installed as
- * host addresses only if the driver requests assisted learning.
- */
- if (switchdev_fdb_is_dynamically_learned(fdb_info) &&
- !ds->assisted_learning_on_cpu_port)
- return 0;
+ if (switchdev_fdb_is_dynamically_learned(fdb_info)) {
+ if (dsa_port_offloads_bridge_port(dp, orig_dev))
+ return 0;
+
+ /* FDB entries learned by the software bridge or by foreign
+ * bridge ports should be installed as host addresses only if
+ * the driver requests assisted learning.
+ */
+ if (!ds->assisted_learning_on_cpu_port)
+ return 0;
+ }
/* Also treat FDB entries on foreign interfaces bridged with us as host
* addresses.
@@ -2490,6 +3340,18 @@ static int dsa_slave_fdb_event(struct net_device *dev,
if (dsa_foreign_dev_check(dev, orig_dev))
host_addr = true;
+ /* Check early that we're not doing work in vain.
+ * Host addresses on LAG ports still require regular FDB ops,
+ * since the CPU port isn't in a LAG.
+ */
+ if (dp->lag && !host_addr) {
+ if (!ds->ops->lag_fdb_add || !ds->ops->lag_fdb_del)
+ return -EOPNOTSUPP;
+ } else {
+ if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+ }
+
switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
if (!switchdev_work)
return -ENOMEM;
@@ -2500,10 +3362,9 @@ static int dsa_slave_fdb_event(struct net_device *dev,
host_addr ? " as host address" : "");
INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
- switchdev_work->ds = ds;
- switchdev_work->port = dp->index;
switchdev_work->event = event;
switchdev_work->dev = dev;
+ switchdev_work->orig_dev = orig_dev;
ether_addr_copy(switchdev_work->addr, fdb_info->addr);
switchdev_work->vid = fdb_info->vid;
@@ -2532,8 +3393,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
err = switchdev_handle_fdb_event_to_device(dev, event, ptr,
dsa_slave_dev_check,
dsa_foreign_dev_check,
- dsa_slave_fdb_event,
- NULL);
+ dsa_slave_fdb_event);
return notifier_from_errno(err);
default:
return NOTIFY_DONE;
@@ -2550,14 +3410,16 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
switch (event) {
case SWITCHDEV_PORT_OBJ_ADD:
- err = switchdev_handle_port_obj_add(dev, ptr,
- dsa_slave_dev_check,
- dsa_slave_port_obj_add);
+ err = switchdev_handle_port_obj_add_foreign(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_port_obj_add);
return notifier_from_errno(err);
case SWITCHDEV_PORT_OBJ_DEL:
- err = switchdev_handle_port_obj_del(dev, ptr,
- dsa_slave_dev_check,
- dsa_slave_port_obj_del);
+ err = switchdev_handle_port_obj_del_foreign(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_port_obj_del);
return notifier_from_errno(err);
case SWITCHDEV_PORT_ATTR_SET:
err = switchdev_handle_port_attr_set(dev, ptr,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e3c7d2627a61..ce56acdba203 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -49,19 +49,7 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
static bool dsa_port_mtu_match(struct dsa_port *dp,
struct dsa_notifier_mtu_info *info)
{
- if (dp->ds->index == info->sw_index && dp->index == info->port)
- return true;
-
- /* Do not propagate to other switches in the tree if the notifier was
- * targeted for a single switch.
- */
- if (info->targeted_match)
- return false;
-
- if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
- return true;
-
- return false;
+ return dp == info->dp || dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp);
}
static int dsa_switch_mtu(struct dsa_switch *ds,
@@ -88,91 +76,47 @@ static int dsa_switch_mtu(struct dsa_switch *ds,
static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
- struct dsa_switch_tree *dst = ds->dst;
int err;
- if (dst->index == info->tree_index && ds->index == info->sw_index) {
+ if (info->dp->ds == ds) {
if (!ds->ops->port_bridge_join)
return -EOPNOTSUPP;
- err = ds->ops->port_bridge_join(ds, info->port, info->bridge,
- &info->tx_fwd_offload);
+ err = ds->ops->port_bridge_join(ds, info->dp->index,
+ info->bridge,
+ &info->tx_fwd_offload,
+ info->extack);
if (err)
return err;
}
- if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join) {
- err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
- info->sw_index,
- info->port, info->bridge);
+ if (info->dp->ds != ds && ds->ops->crosschip_bridge_join) {
+ err = ds->ops->crosschip_bridge_join(ds,
+ info->dp->ds->dst->index,
+ info->dp->ds->index,
+ info->dp->index,
+ info->bridge,
+ info->extack);
if (err)
return err;
}
- return dsa_tag_8021q_bridge_join(ds, info);
+ return 0;
}
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
- struct dsa_switch_tree *dst = ds->dst;
- struct netlink_ext_ack extack = {0};
- bool change_vlan_filtering = false;
- bool vlan_filtering;
- struct dsa_port *dp;
- int err;
+ if (info->dp->ds == ds && ds->ops->port_bridge_leave)
+ ds->ops->port_bridge_leave(ds, info->dp->index, info->bridge);
- if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_leave)
- ds->ops->port_bridge_leave(ds, info->port, info->bridge);
-
- if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_leave)
- ds->ops->crosschip_bridge_leave(ds, info->tree_index,
- info->sw_index, info->port,
+ if (info->dp->ds != ds && ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->dp->ds->dst->index,
+ info->dp->ds->index,
+ info->dp->index,
info->bridge);
- if (ds->needs_standalone_vlan_filtering &&
- !br_vlan_enabled(info->bridge.dev)) {
- change_vlan_filtering = true;
- vlan_filtering = true;
- } else if (!ds->needs_standalone_vlan_filtering &&
- br_vlan_enabled(info->bridge.dev)) {
- change_vlan_filtering = true;
- vlan_filtering = false;
- }
-
- /* If the bridge was vlan_filtering, the bridge core doesn't trigger an
- * event for changing vlan_filtering setting upon slave ports leaving
- * it. That is a good thing, because that lets us handle it and also
- * handle the case where the switch's vlan_filtering setting is global
- * (not per port). When that happens, the correct moment to trigger the
- * vlan_filtering callback is only when the last port leaves the last
- * VLAN-aware bridge.
- */
- if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- dsa_switch_for_each_port(dp, ds) {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
-
- if (br && br_vlan_enabled(br)) {
- change_vlan_filtering = false;
- break;
- }
- }
- }
-
- if (change_vlan_filtering) {
- err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
- vlan_filtering, &extack);
- if (extack._msg)
- dev_err(ds->dev, "port %d: %s\n", info->port,
- extack._msg);
- if (err && err != -EOPNOTSUPP)
- return err;
- }
-
- return dsa_tag_8021q_bridge_leave(ds, info);
+ return 0;
}
/* Matches for all upstream-facing ports (the CPU port and all upstream-facing
@@ -180,16 +124,11 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
* emitted and its dedicated CPU port.
*/
static bool dsa_port_host_address_match(struct dsa_port *dp,
- int info_sw_index, int info_port)
+ const struct dsa_port *targeted_dp)
{
- struct dsa_port *targeted_dp, *cpu_dp;
- struct dsa_switch *targeted_ds;
+ struct dsa_port *cpu_dp = targeted_dp->cpu_dp;
- targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info_port);
- cpu_dp = targeted_dp->cpu_dp;
-
- if (dsa_switch_is_upstream_of(dp->ds, targeted_ds))
+ if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds))
return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index,
cpu_dp->index);
@@ -197,20 +136,22 @@ static bool dsa_port_host_address_match(struct dsa_port *dp,
}
static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list,
- const unsigned char *addr,
- u16 vid)
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
{
struct dsa_mac_addr *a;
list_for_each_entry(a, addr_list, list)
- if (ether_addr_equal(a->addr, addr) && a->vid == vid)
+ if (ether_addr_equal(a->addr, addr) && a->vid == vid &&
+ dsa_db_equal(&a->db, &db))
return a;
return NULL;
}
static int dsa_port_do_mdb_add(struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -219,11 +160,11 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_mdb_add(ds, port, mdb);
+ return ds->ops->port_mdb_add(ds, port, mdb, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+ a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (a) {
refcount_inc(&a->refcount);
goto out;
@@ -235,7 +176,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
goto out;
}
- err = ds->ops->port_mdb_add(ds, port, mdb);
+ err = ds->ops->port_mdb_add(ds, port, mdb, db);
if (err) {
kfree(a);
goto out;
@@ -243,6 +184,7 @@ static int dsa_port_do_mdb_add(struct dsa_port *dp,
ether_addr_copy(a->addr, mdb->addr);
a->vid = mdb->vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->mdbs);
@@ -253,7 +195,8 @@ out:
}
static int dsa_port_do_mdb_del(struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb)
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -262,11 +205,11 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_mdb_del(ds, port, mdb);
+ return ds->ops->port_mdb_del(ds, port, mdb, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+ a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid, db);
if (!a) {
err = -ENOENT;
goto out;
@@ -275,7 +218,7 @@ static int dsa_port_do_mdb_del(struct dsa_port *dp,
if (!refcount_dec_and_test(&a->refcount))
goto out;
- err = ds->ops->port_mdb_del(ds, port, mdb);
+ err = ds->ops->port_mdb_del(ds, port, mdb, db);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -291,7 +234,7 @@ out:
}
static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+ u16 vid, struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -300,11 +243,11 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_fdb_add(ds, port, addr, vid);
+ return ds->ops->port_fdb_add(ds, port, addr, vid, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+ a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (a) {
refcount_inc(&a->refcount);
goto out;
@@ -316,7 +259,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
goto out;
}
- err = ds->ops->port_fdb_add(ds, port, addr, vid);
+ err = ds->ops->port_fdb_add(ds, port, addr, vid, db);
if (err) {
kfree(a);
goto out;
@@ -324,6 +267,7 @@ static int dsa_port_do_fdb_add(struct dsa_port *dp, const unsigned char *addr,
ether_addr_copy(a->addr, addr);
a->vid = vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &dp->fdbs);
@@ -334,7 +278,7 @@ out:
}
static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
- u16 vid)
+ u16 vid, struct dsa_db db)
{
struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a;
@@ -343,11 +287,11 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
/* No need to bother with refcounting for user ports */
if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
- return ds->ops->port_fdb_del(ds, port, addr, vid);
+ return ds->ops->port_fdb_del(ds, port, addr, vid, db);
mutex_lock(&dp->addr_lists_lock);
- a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+ a = dsa_mac_addr_find(&dp->fdbs, addr, vid, db);
if (!a) {
err = -ENOENT;
goto out;
@@ -356,7 +300,7 @@ static int dsa_port_do_fdb_del(struct dsa_port *dp, const unsigned char *addr,
if (!refcount_dec_and_test(&a->refcount))
goto out;
- err = ds->ops->port_fdb_del(ds, port, addr, vid);
+ err = ds->ops->port_fdb_del(ds, port, addr, vid, db);
if (err) {
refcount_set(&a->refcount, 1);
goto out;
@@ -371,6 +315,78 @@ out:
return err;
}
+static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
+ if (a) {
+ refcount_inc(&a->refcount);
+ goto out;
+ }
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = ds->ops->lag_fdb_add(ds, *lag, addr, vid, db);
+ if (err) {
+ kfree(a);
+ goto out;
+ }
+
+ ether_addr_copy(a->addr, addr);
+ a->vid = vid;
+ a->db = db;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &lag->fdbs);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+
+ return err;
+}
+
+static int dsa_switch_do_lag_fdb_del(struct dsa_switch *ds, struct dsa_lag *lag,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_mac_addr *a;
+ int err = 0;
+
+ mutex_lock(&lag->fdb_lock);
+
+ a = dsa_mac_addr_find(&lag->fdbs, addr, vid, db);
+ if (!a) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!refcount_dec_and_test(&a->refcount))
+ goto out;
+
+ err = ds->ops->lag_fdb_del(ds, *lag, addr, vid, db);
+ if (err) {
+ refcount_set(&a->refcount, 1);
+ goto out;
+ }
+
+ list_del(&a->list);
+ kfree(a);
+
+out:
+ mutex_unlock(&lag->fdb_lock);
+
+ return err;
+}
+
static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
@@ -381,9 +397,16 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
- err = dsa_port_do_fdb_add(dp, info->addr, info->vid);
+ if (dsa_port_host_address_match(dp, info->dp)) {
+ if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) {
+ err = dsa_switch_do_lag_fdb_add(ds, dp->lag,
+ info->addr,
+ info->vid,
+ info->db);
+ } else {
+ err = dsa_port_do_fdb_add(dp, info->addr,
+ info->vid, info->db);
+ }
if (err)
break;
}
@@ -402,9 +425,16 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
- err = dsa_port_do_fdb_del(dp, info->addr, info->vid);
+ if (dsa_port_host_address_match(dp, info->dp)) {
+ if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) {
+ err = dsa_switch_do_lag_fdb_del(ds, dp->lag,
+ info->addr,
+ info->vid,
+ info->db);
+ } else {
+ err = dsa_port_do_fdb_del(dp, info->addr,
+ info->vid, info->db);
+ }
if (err)
break;
}
@@ -416,36 +446,72 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return dsa_port_do_fdb_add(dp, info->addr, info->vid);
+ return dsa_port_do_fdb_add(dp, info->addr, info->vid, info->db);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return dsa_port_do_fdb_del(dp, info->addr, info->vid);
+ return dsa_port_do_fdb_del(dp, info->addr, info->vid, info->db);
+}
+
+static int dsa_switch_lag_fdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_lag_fdb_info *info)
+{
+ struct dsa_port *dp;
+
+ if (!ds->ops->lag_fdb_add)
+ return -EOPNOTSUPP;
+
+ /* Notify switch only if it has a port in this LAG */
+ dsa_switch_for_each_port(dp, ds)
+ if (dsa_port_offloads_lag(dp, info->lag))
+ return dsa_switch_do_lag_fdb_add(ds, info->lag,
+ info->addr, info->vid,
+ info->db);
+
+ return 0;
+}
+
+static int dsa_switch_lag_fdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_lag_fdb_info *info)
+{
+ struct dsa_port *dp;
+
+ if (!ds->ops->lag_fdb_del)
+ return -EOPNOTSUPP;
+
+ /* Notify switch only if it has a port in this LAG */
+ dsa_switch_for_each_port(dp, ds)
+ if (dsa_port_offloads_lag(dp, info->lag))
+ return dsa_switch_do_lag_fdb_del(ds, info->lag,
+ info->addr, info->vid,
+ info->db);
+
+ return 0;
}
static int dsa_switch_lag_change(struct dsa_switch *ds,
struct dsa_notifier_lag_info *info)
{
- if (ds->index == info->sw_index && ds->ops->port_lag_change)
- return ds->ops->port_lag_change(ds, info->port);
+ if (info->dp->ds == ds && ds->ops->port_lag_change)
+ return ds->ops->port_lag_change(ds, info->dp->index);
- if (ds->index != info->sw_index && ds->ops->crosschip_lag_change)
- return ds->ops->crosschip_lag_change(ds, info->sw_index,
- info->port);
+ if (info->dp->ds != ds && ds->ops->crosschip_lag_change)
+ return ds->ops->crosschip_lag_change(ds, info->dp->ds->index,
+ info->dp->index);
return 0;
}
@@ -453,14 +519,14 @@ static int dsa_switch_lag_change(struct dsa_switch *ds,
static int dsa_switch_lag_join(struct dsa_switch *ds,
struct dsa_notifier_lag_info *info)
{
- if (ds->index == info->sw_index && ds->ops->port_lag_join)
- return ds->ops->port_lag_join(ds, info->port, info->lag,
- info->info);
+ if (info->dp->ds == ds && ds->ops->port_lag_join)
+ return ds->ops->port_lag_join(ds, info->dp->index, info->lag,
+ info->info, info->extack);
- if (ds->index != info->sw_index && ds->ops->crosschip_lag_join)
- return ds->ops->crosschip_lag_join(ds, info->sw_index,
- info->port, info->lag,
- info->info);
+ if (info->dp->ds != ds && ds->ops->crosschip_lag_join)
+ return ds->ops->crosschip_lag_join(ds, info->dp->ds->index,
+ info->dp->index, info->lag,
+ info->info, info->extack);
return -EOPNOTSUPP;
}
@@ -468,12 +534,12 @@ static int dsa_switch_lag_join(struct dsa_switch *ds,
static int dsa_switch_lag_leave(struct dsa_switch *ds,
struct dsa_notifier_lag_info *info)
{
- if (ds->index == info->sw_index && ds->ops->port_lag_leave)
- return ds->ops->port_lag_leave(ds, info->port, info->lag);
+ if (info->dp->ds == ds && ds->ops->port_lag_leave)
+ return ds->ops->port_lag_leave(ds, info->dp->index, info->lag);
- if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave)
- return ds->ops->crosschip_lag_leave(ds, info->sw_index,
- info->port, info->lag);
+ if (info->dp->ds != ds && ds->ops->crosschip_lag_leave)
+ return ds->ops->crosschip_lag_leave(ds, info->dp->ds->index,
+ info->dp->index, info->lag);
return -EOPNOTSUPP;
}
@@ -481,25 +547,25 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds,
static int dsa_switch_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_mdb_add)
return -EOPNOTSUPP;
- return dsa_port_do_mdb_add(dp, info->mdb);
+ return dsa_port_do_mdb_add(dp, info->mdb, info->db);
}
static int dsa_switch_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
- return dsa_port_do_mdb_del(dp, info->mdb);
+ return dsa_port_do_mdb_del(dp, info->mdb, info->db);
}
static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
@@ -512,9 +578,8 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
- err = dsa_port_do_mdb_add(dp, info->mdb);
+ if (dsa_port_host_address_match(dp, info->dp)) {
+ err = dsa_port_do_mdb_add(dp, info->mdb, info->db);
if (err)
break;
}
@@ -533,9 +598,8 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
- err = dsa_port_do_mdb_del(dp, info->mdb);
+ if (dsa_port_host_address_match(dp, info->dp)) {
+ err = dsa_port_do_mdb_del(dp, info->mdb, info->db);
if (err)
break;
}
@@ -544,18 +608,128 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
return err;
}
+/* Port VLANs match on the targeted port and on all DSA ports */
static bool dsa_port_vlan_match(struct dsa_port *dp,
struct dsa_notifier_vlan_info *info)
{
- if (dp->ds->index == info->sw_index && dp->index == info->port)
- return true;
+ return dsa_port_is_dsa(dp) || dp == info->dp;
+}
- if (dsa_port_is_dsa(dp))
- return true;
+/* Host VLANs match on the targeted port's CPU port, and on all DSA ports
+ * (upstream and downstream) of that switch and its upstream switches.
+ */
+static bool dsa_port_host_vlan_match(struct dsa_port *dp,
+ const struct dsa_port *targeted_dp)
+{
+ struct dsa_port *cpu_dp = targeted_dp->cpu_dp;
+
+ if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds))
+ return dsa_port_is_dsa(dp) || dp == cpu_dp;
return false;
}
+static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_vlan *v;
+
+ list_for_each_entry(v, vlan_list, list)
+ if (v->vid == vlan->vid)
+ return v;
+
+ return NULL;
+}
+
+static int dsa_port_do_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ struct dsa_vlan *v;
+ int err = 0;
+
+ /* No need to bother with refcounting for user ports. */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_vlan_add(ds, port, vlan, extack);
+
+ /* No need to propagate on shared ports the existing VLANs that were
+ * re-notified after just the flags have changed. This would cause a
+ * refcount bump which we need to avoid, since it unbalances the
+ * additions with the deletions.
+ */
+ if (vlan->changed)
+ return 0;
+
+ mutex_lock(&dp->vlans_lock);
+
+ v = dsa_vlan_find(&dp->vlans, vlan);
+ if (v) {
+ refcount_inc(&v->refcount);
+ goto out;
+ }
+
+ v = kzalloc(sizeof(*v), GFP_KERNEL);
+ if (!v) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = ds->ops->port_vlan_add(ds, port, vlan, extack);
+ if (err) {
+ kfree(v);
+ goto out;
+ }
+
+ v->vid = vlan->vid;
+ refcount_set(&v->refcount, 1);
+ list_add_tail(&v->list, &dp->vlans);
+
+out:
+ mutex_unlock(&dp->vlans_lock);
+
+ return err;
+}
+
+static int dsa_port_do_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ struct dsa_vlan *v;
+ int err = 0;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_vlan_del(ds, port, vlan);
+
+ mutex_lock(&dp->vlans_lock);
+
+ v = dsa_vlan_find(&dp->vlans, vlan);
+ if (!v) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!refcount_dec_and_test(&v->refcount))
+ goto out;
+
+ err = ds->ops->port_vlan_del(ds, port, vlan);
+ if (err) {
+ refcount_set(&v->refcount, 1);
+ goto out;
+ }
+
+ list_del(&v->list);
+ kfree(v);
+
+out:
+ mutex_unlock(&dp->vlans_lock);
+
+ return err;
+}
+
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
@@ -567,8 +741,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
dsa_switch_for_each_port(dp, ds) {
if (dsa_port_vlan_match(dp, info)) {
- err = ds->ops->port_vlan_add(ds, dp->index, info->vlan,
- info->extack);
+ err = dsa_port_do_vlan_add(dp, info->vlan,
+ info->extack);
if (err)
return err;
}
@@ -580,15 +754,61 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
static int dsa_switch_vlan_del(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
+ struct dsa_port *dp;
+ int err;
+
if (!ds->ops->port_vlan_del)
return -EOPNOTSUPP;
- if (ds->index == info->sw_index)
- return ds->ops->port_vlan_del(ds, info->port, info->vlan);
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_vlan_match(dp, info)) {
+ err = dsa_port_do_vlan_del(dp, info->vlan);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int dsa_switch_host_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ struct dsa_port *dp;
+ int err;
+
+ if (!ds->ops->port_vlan_add)
+ return -EOPNOTSUPP;
+
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_vlan_match(dp, info->dp)) {
+ err = dsa_port_do_vlan_add(dp, info->vlan,
+ info->extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int dsa_switch_host_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ struct dsa_port *dp;
+ int err;
+
+ if (!ds->ops->port_vlan_del)
+ return -EOPNOTSUPP;
+
+ dsa_switch_for_each_port(dp, ds) {
+ if (dsa_port_host_vlan_match(dp, info->dp)) {
+ err = dsa_port_do_vlan_del(dp, info->vlan);
+ if (err)
+ return err;
+ }
+ }
- /* Do not deprogram the DSA links as they may be used as conduit
- * for other VLAN members in the fabric.
- */
return 0;
}
@@ -604,14 +824,12 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
ASSERT_RTNL();
- dsa_switch_for_each_cpu_port(cpu_dp, ds) {
- err = ds->ops->change_tag_protocol(ds, cpu_dp->index,
- tag_ops->proto);
- if (err)
- return err;
+ err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
+ if (err)
+ return err;
+ dsa_switch_for_each_cpu_port(cpu_dp, ds)
dsa_port_set_tag_protocol(cpu_dp, tag_ops);
- }
/* Now that changing the tag protocol can no longer fail, let's update
* the remaining bits which are "duplicated for faster access", and the
@@ -683,6 +901,18 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds,
return 0;
}
+static int
+dsa_switch_master_state_change(struct dsa_switch *ds,
+ struct dsa_notifier_master_state_info *info)
+{
+ if (!ds->ops->master_state_change)
+ return 0;
+
+ ds->ops->master_state_change(ds, info->master, info->operational);
+
+ return 0;
+}
+
static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
{
@@ -711,6 +941,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_HOST_FDB_DEL:
err = dsa_switch_host_fdb_del(ds, info);
break;
+ case DSA_NOTIFIER_LAG_FDB_ADD:
+ err = dsa_switch_lag_fdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_LAG_FDB_DEL:
+ err = dsa_switch_lag_fdb_del(ds, info);
+ break;
case DSA_NOTIFIER_LAG_CHANGE:
err = dsa_switch_lag_change(ds, info);
break;
@@ -738,6 +974,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_VLAN_DEL:
err = dsa_switch_vlan_del(ds, info);
break;
+ case DSA_NOTIFIER_HOST_VLAN_ADD:
+ err = dsa_switch_host_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_HOST_VLAN_DEL:
+ err = dsa_switch_host_vlan_del(ds, info);
+ break;
case DSA_NOTIFIER_MTU:
err = dsa_switch_mtu(ds, info);
break;
@@ -756,6 +998,9 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
err = dsa_switch_tag_8021q_vlan_del(ds, info);
break;
+ case DSA_NOTIFIER_MASTER_STATE_CHANGE:
+ err = dsa_switch_master_state_change(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 27712a81c967..34e5ec5d3e23 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -2,9 +2,7 @@
/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*
* This module is not a complete tagger implementation. It only provides
- * primitives for taggers that rely on 802.1Q VLAN tags to use. The
- * dsa_8021q_netdev_ops is registered for API compliance and not used
- * directly by callers.
+ * primitives for taggers that rely on 802.1Q VLAN tags to use.
*/
#include <linux/if_vlan.h>
#include <linux/dsa/8021q.h>
@@ -16,15 +14,11 @@
*
* | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* +-----------+-----+-----------------+-----------+-----------------------+
- * | DIR | VBID| SWITCH_ID | VBID | PORT |
+ * | RSV | VBID| SWITCH_ID | VBID | PORT |
* +-----------+-----+-----------------+-----------+-----------------------+
*
- * DIR - VID[11:10]:
- * Direction flags.
- * * 1 (0b01) for RX VLAN,
- * * 2 (0b10) for TX VLAN.
- * These values make the special VIDs of 0, 1 and 4095 to be left
- * unused by this coding scheme.
+ * RSV - VID[11:10]:
+ * Reserved. Must be set to 3 (0b11).
*
* SWITCH_ID - VID[8:6]:
* Index of switch within DSA tree. Must be between 0 and 7.
@@ -32,18 +26,17 @@
* VBID - { VID[9], VID[5:4] }:
* Virtual bridge ID. If between 1 and 7, packet targets the broadcast
* domain of a bridge. If transmitted as zero, packet targets a single
- * port. Field only valid on transmit, must be ignored on receive.
+ * port.
*
* PORT - VID[3:0]:
* Index of switch port. Must be between 0 and 15.
*/
-#define DSA_8021Q_DIR_SHIFT 10
-#define DSA_8021Q_DIR_MASK GENMASK(11, 10)
-#define DSA_8021Q_DIR(x) (((x) << DSA_8021Q_DIR_SHIFT) & \
- DSA_8021Q_DIR_MASK)
-#define DSA_8021Q_DIR_RX DSA_8021Q_DIR(1)
-#define DSA_8021Q_DIR_TX DSA_8021Q_DIR(2)
+#define DSA_8021Q_RSV_VAL 3
+#define DSA_8021Q_RSV_SHIFT 10
+#define DSA_8021Q_RSV_MASK GENMASK(11, 10)
+#define DSA_8021Q_RSV ((DSA_8021Q_RSV_VAL << DSA_8021Q_RSV_SHIFT) & \
+ DSA_8021Q_RSV_MASK)
#define DSA_8021Q_SWITCH_ID_SHIFT 6
#define DSA_8021Q_SWITCH_ID_MASK GENMASK(8, 6)
@@ -67,34 +60,24 @@
#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \
DSA_8021Q_PORT_MASK)
-u16 dsa_8021q_bridge_tx_fwd_offload_vid(unsigned int bridge_num)
+u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num)
{
/* The VBID value of 0 is reserved for precise TX, but it is also
* reserved/invalid for the bridge_num, so all is well.
*/
- return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num);
+ return DSA_8021Q_RSV | DSA_8021Q_VBID(bridge_num);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
-
-/* Returns the VID to be inserted into the frame from xmit for switch steering
- * instructions on egress. Encodes switch ID and port ID.
- */
-u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp)
-{
- return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
- DSA_8021Q_PORT(dp->index);
-}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_vid);
/* Returns the VID that will be installed as pvid for this switch port, sent as
* tagged egress towards the CPU port and decoded by the rcv function.
*/
-u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp)
+u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp)
{
- return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
+ return DSA_8021Q_RSV | DSA_8021Q_SWITCH_ID(dp->ds->index) |
DSA_8021Q_PORT(dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_standalone_vid);
/* Returns the decoded switch ID from the RX VID. */
int dsa_8021q_rx_switch_id(u16 vid)
@@ -110,21 +93,20 @@ int dsa_8021q_rx_source_port(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
-bool vid_is_dsa_8021q_rxvlan(u16 vid)
+/* Returns the decoded VBID from the RX VID. */
+static int dsa_tag_8021q_rx_vbid(u16 vid)
{
- return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
-}
-EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_rxvlan);
+ u16 vbid_hi = (vid & DSA_8021Q_VBID_HI_MASK) >> DSA_8021Q_VBID_HI_SHIFT;
+ u16 vbid_lo = (vid & DSA_8021Q_VBID_LO_MASK) >> DSA_8021Q_VBID_LO_SHIFT;
-bool vid_is_dsa_8021q_txvlan(u16 vid)
-{
- return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX;
+ return (vbid_hi << 2) | vbid_lo;
}
-EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_txvlan);
bool vid_is_dsa_8021q(u16 vid)
{
- return vid_is_dsa_8021q_rxvlan(vid) || vid_is_dsa_8021q_txvlan(vid);
+ u16 rsv = (vid & DSA_8021Q_RSV_MASK) >> DSA_8021Q_RSV_SHIFT;
+
+ return rsv == DSA_8021Q_RSV_VAL;
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
@@ -212,15 +194,7 @@ static bool
dsa_port_tag_8021q_vlan_match(struct dsa_port *dp,
struct dsa_notifier_tag_8021q_vlan_info *info)
{
- struct dsa_switch *ds = dp->ds;
-
- if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
- return true;
-
- if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
- return dp->index == info->port;
-
- return false;
+ return dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp) || dp == info->dp;
}
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
@@ -242,12 +216,8 @@ int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
u16 flags = 0;
if (dsa_port_is_user(dp))
- flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-
- if (vid_is_dsa_8021q_rxvlan(info->vid) &&
- dsa_8021q_rx_switch_id(info->vid) == ds->index &&
- dsa_8021q_rx_source_port(info->vid) == dp->index)
- flags |= BRIDGE_VLAN_INFO_PVID;
+ flags |= BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_PVID;
err = dsa_port_do_tag_8021q_vlan_add(dp, info->vid,
flags);
@@ -279,162 +249,78 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
return 0;
}
-/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
- * front-panel switch port (here swp0).
+/* There are 2 ways of offloading tag_8021q VLANs.
*
- * Port identification through VLAN (802.1Q) tags has different requirements
- * for it to work effectively:
- * - On RX (ingress from network): each front-panel port must have a pvid
- * that uniquely identifies it, and the egress of this pvid must be tagged
- * towards the CPU port, so that software can recover the source port based
- * on the VID in the frame. But this would only work for standalone ports;
- * if bridged, this VLAN setup would break autonomous forwarding and would
- * force all switched traffic to pass through the CPU. So we must also make
- * the other front-panel ports members of this VID we're adding, albeit
- * we're not making it their PVID (they'll still have their own).
- * - On TX (ingress from CPU and towards network) we are faced with a problem.
- * If we were to tag traffic (from within DSA) with the port's pvid, all
- * would be well, assuming the switch ports were standalone. Frames would
- * have no choice but to be directed towards the correct front-panel port.
- * But because we also want the RX VLAN to not break bridging, then
- * inevitably that means that we have to give them a choice (of what
- * front-panel port to go out on), and therefore we cannot steer traffic
- * based on the RX VID. So what we do is simply install one more VID on the
- * front-panel and CPU ports, and profit off of the fact that steering will
- * work just by virtue of the fact that there is only one other port that's
- * a member of the VID we're tagging the traffic with - the desired one.
+ * One is to use a hardware TCAM to push the port's standalone VLAN into the
+ * frame when forwarding it to the CPU, as an egress modification rule on the
+ * CPU port. This is preferable because it has no side effects for the
+ * autonomous forwarding path, and accomplishes tag_8021q's primary goal of
+ * identifying the source port of each packet based on VLAN ID.
*
- * So at the end, each front-panel port will have one RX VID (also the PVID),
- * the RX VID of all other front-panel ports that are in the same bridge, and
- * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all
- * front-panel ports, and on top of that, is also tagged-input and
- * tagged-output (VLAN trunk).
+ * The other is to commit the tag_8021q VLAN as a PVID to the VLAN table, and
+ * to configure the port as VLAN-unaware. This is less preferable because
+ * unique source port identification can only be done for standalone ports;
+ * under a VLAN-unaware bridge, all ports share the same tag_8021q VLAN as
+ * PVID, and under a VLAN-aware bridge, packets received by software will not
+ * have tag_8021q VLANs appended, just bridge VLANs.
*
- * CPU port CPU port
- * +-------------+-----+-------------+ +-------------+-----+-------------+
- * | RX VID | | | | TX VID | | |
- * | of swp0 | | | | of swp0 | | |
- * | +-----+ | | +-----+ |
- * | ^ T | | | Tagged |
- * | | | | | ingress |
- * | +-------+---+---+-------+ | | +-----------+ |
- * | | | | | | | | Untagged |
- * | | U v U v U v | | v egress |
- * | +-----+ +-----+ +-----+ +-----+ | | +-----+ +-----+ +-----+ +-----+ |
- * | | | | | | | | | | | | | | | | | | | |
- * | |PVID | | | | | | | | | | | | | | | | | |
- * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+
- * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3
+ * For tag_8021q implementations of the second type, this method is used to
+ * replace the standalone tag_8021q VLAN of a port with the tag_8021q VLAN to
+ * be used for VLAN-unaware bridging.
*/
-static bool
-dsa_port_tag_8021q_bridge_match(struct dsa_port *dp,
- struct dsa_notifier_bridge_info *info)
-{
- /* Don't match on self */
- if (dp->ds->dst->index == info->tree_index &&
- dp->ds->index == info->sw_index &&
- dp->index == info->port)
- return false;
-
- if (dsa_port_is_user(dp))
- return dsa_port_offloads_bridge(dp, &info->bridge);
-
- return false;
-}
-
-int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge)
{
- struct dsa_switch *targeted_ds;
- struct dsa_port *targeted_dp;
- struct dsa_port *dp;
- u16 targeted_rx_vid;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 standalone_vid, bridge_vid;
int err;
- if (!ds->tag_8021q_ctx)
- return 0;
-
- targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info->port);
- targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
-
- dsa_switch_for_each_port(dp, ds) {
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
+ /* Delete the standalone VLAN of the port and replace it with a
+ * bridging VLAN
+ */
+ standalone_vid = dsa_tag_8021q_standalone_vid(dp);
+ bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num);
- if (!dsa_port_tag_8021q_bridge_match(dp, info))
- continue;
+ err = dsa_port_tag_8021q_vlan_add(dp, bridge_vid, true);
+ if (err)
+ return err;
- /* Install the RX VID of the targeted port in our VLAN table */
- err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true);
- if (err)
- return err;
-
- /* Install our RX VID into the targeted port's VLAN table */
- err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true);
- if (err)
- return err;
- }
+ dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false);
return 0;
}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join);
-int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
+void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port,
+ struct dsa_bridge bridge)
{
- struct dsa_switch *targeted_ds;
- struct dsa_port *targeted_dp;
- struct dsa_port *dp;
- u16 targeted_rx_vid;
-
- if (!ds->tag_8021q_ctx)
- return 0;
-
- targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info->port);
- targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
-
- dsa_switch_for_each_port(dp, ds) {
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
-
- if (!dsa_port_tag_8021q_bridge_match(dp, info))
- continue;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 standalone_vid, bridge_vid;
+ int err;
- /* Remove the RX VID of the targeted port from our VLAN table */
- dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true);
+ /* Delete the bridging VLAN of the port and replace it with a
+ * standalone VLAN
+ */
+ standalone_vid = dsa_tag_8021q_standalone_vid(dp);
+ bridge_vid = dsa_tag_8021q_bridge_vid(bridge.num);
- /* Remove our RX VID from the targeted port's VLAN table */
- dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true);
+ err = dsa_port_tag_8021q_vlan_add(dp, standalone_vid, false);
+ if (err) {
+ dev_err(ds->dev,
+ "Failed to delete tag_8021q standalone VLAN %d from port %d: %pe\n",
+ standalone_vid, port, ERR_PTR(err));
}
- return 0;
-}
-
-int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
-{
- u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num);
-
- return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid,
- true);
-}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload);
-
-void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
-{
- u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge.num);
-
- dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true);
+ dsa_port_tag_8021q_vlan_del(dp, bridge_vid, true);
}
-EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_leave);
-/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */
+/* Set up a port's standalone tag_8021q VLAN */
static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
{
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 vid = dsa_tag_8021q_standalone_vid(dp);
struct net_device *master;
int err;
@@ -444,32 +330,18 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
if (!dsa_port_is_user(dp))
return 0;
- master = dp->cpu_dp->master;
+ master = dsa_port_to_master(dp);
- /* Add this user port's RX VID to the membership list of all others
- * (including itself). This is so that bridging will not be hindered.
- * L2 forwarding rules still take precedence when there are no VLAN
- * restrictions, so there are no concerns about leaking traffic.
- */
- err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false);
+ err = dsa_port_tag_8021q_vlan_add(dp, vid, false);
if (err) {
dev_err(ds->dev,
- "Failed to apply RX VID %d to port %d: %pe\n",
- rx_vid, port, ERR_PTR(err));
+ "Failed to apply standalone VID %d to port %d: %pe\n",
+ vid, port, ERR_PTR(err));
return err;
}
- /* Add @rx_vid to the master's RX filter. */
- vlan_vid_add(master, ctx->proto, rx_vid);
-
- /* Finally apply the TX VID on this port and on the CPU port */
- err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false);
- if (err) {
- dev_err(ds->dev,
- "Failed to apply TX VID %d on port %d: %pe\n",
- tx_vid, port, ERR_PTR(err));
- return err;
- }
+ /* Add the VLAN to the master's RX filter. */
+ vlan_vid_add(master, ctx->proto, vid);
return err;
}
@@ -478,8 +350,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
{
struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
struct dsa_port *dp = dsa_to_port(ds, port);
- u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 vid = dsa_tag_8021q_standalone_vid(dp);
struct net_device *master;
/* The CPU port is implicitly configured by
@@ -488,13 +359,11 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
if (!dsa_port_is_user(dp))
return;
- master = dp->cpu_dp->master;
-
- dsa_port_tag_8021q_vlan_del(dp, rx_vid, false);
+ master = dsa_port_to_master(dp);
- vlan_vid_del(master, ctx->proto, rx_vid);
+ dsa_port_tag_8021q_vlan_del(dp, vid, false);
- dsa_port_tag_8021q_vlan_del(dp, tx_vid, false);
+ vlan_vid_del(master, ctx->proto, vid);
}
static int dsa_tag_8021q_setup(struct dsa_switch *ds)
@@ -573,23 +442,57 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id)
+struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master,
+ int vbid)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_port *dp;
+
+ if (WARN_ON(!vbid))
+ return NULL;
+
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (!dp->bridge)
+ continue;
+
+ if (dp->stp_state != BR_STATE_LEARNING &&
+ dp->stp_state != BR_STATE_FORWARDING)
+ continue;
+
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
+ if (dsa_port_bridge_num_get(dp) == vbid)
+ return dp->slave;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid);
+
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
+ int *vbid)
{
u16 vid, tci;
- skb_push_rcsum(skb, ETH_HLEN);
if (skb_vlan_tag_present(skb)) {
tci = skb_vlan_tag_get(skb);
__vlan_hwaccel_clear_tag(skb);
} else {
+ skb_push_rcsum(skb, ETH_HLEN);
__skb_vlan_pop(skb, &tci);
+ skb_pull_rcsum(skb, ETH_HLEN);
}
- skb_pull_rcsum(skb, ETH_HLEN);
vid = tci & VLAN_VID_MASK;
*source_port = dsa_8021q_rx_source_port(vid);
*switch_id = dsa_8021q_rx_switch_id(vid);
+
+ if (vbid)
+ *vbid = dsa_tag_8021q_rx_vbid(vid);
+
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 96dbb8ee2fee..16889ea3e0a7 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -34,7 +34,7 @@
/* Newer Broadcom tag (4 bytes) */
#define BRCM_TAG_LEN 4
-/* Tag is constructed and desconstructed using byte by byte access
+/* Tag is constructed and deconstructed using byte by byte access
* because the tag is placed after the MAC Source Address, which does
* not make it 4-bytes aligned, so this might cause unaligned accesses
* on most systems where this is used.
@@ -103,7 +103,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
brcm_tag = skb->data + offset;
- /* Set the ingress opcode, traffic class, tag enforcment is
+ /* Set the ingress opcode, traffic class, tag enforcement is
* deprecated
*/
brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) |
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 8abf39dcac64..e4b6e3f2a3db 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -127,6 +127,7 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct net_device *br_dev;
u8 tag_dev, tag_port;
enum dsa_cmd cmd;
u8 *dsa_header;
@@ -149,7 +150,16 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
tag_port = dp->index;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ br_dev = dsa_port_bridge_dev_get(dp);
+
+ /* If frame is already 802.1Q tagged, we can convert it to a DSA
+ * tag (avoiding a memmove), but only if the port is standalone
+ * (in which case we always send FROM_CPU) or if the port's
+ * bridge has VLAN filtering enabled (in which case the CPU port
+ * will be a member of the VLAN).
+ */
+ if (skb->protocol == htons(ETH_P_8021Q) &&
+ (!br_dev || br_vlan_enabled(br_dev))) {
if (extra) {
skb_push(skb, extra);
dsa_alloc_etype_header(skb, extra);
@@ -166,10 +176,9 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
dsa_header[2] &= ~0x10;
}
} else {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
u16 vid;
- vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
+ vid = br_dev ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
skb_push(skb, DSA_HLEN + extra);
dsa_alloc_etype_header(skb, DSA_HLEN + extra);
@@ -246,12 +255,14 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
if (trunk) {
struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_lag *lag;
/* The exact source port is not available in the tag,
* so we inject the frame directly on the upper
* team/bond.
*/
- skb->dev = dsa_lag_dev(cpu_dp->dst, source_port);
+ lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1);
+ skb->dev = lag ? lag->dev : NULL;
} else {
skb->dev = dsa_master_find_slave(dev, source_device,
source_port);
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index f64b805303cd..846588c0070a 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -21,6 +21,14 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *tag;
+ /* Calculate checksums (if required) before adding the trailer tag to
+ * avoid including it in calculations. That would lead to wrong
+ * checksums after the switch strips the tag.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, HELLCREEK_TAG_LEN);
*tag = BIT(dp->index);
@@ -37,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev) {
- netdev_warn(dev, "Failed to get source port: %d\n", port);
+ netdev_warn_once(dev, "Failed to get source port: %d\n", port);
return NULL;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 3509fc967ca9..38fa19c1e2d5 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -193,10 +193,69 @@ static const struct dsa_device_ops ksz9893_netdev_ops = {
DSA_TAG_DRIVER(ksz9893_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893);
+/* For xmit, 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : represents tag override, lookup and valid
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x80=port8)
+ *
+ * For rcv, 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ * (eg, 0x00=port1, 0x02=port3, 0x07=port8)
+ */
+#define LAN937X_EGRESS_TAG_LEN 2
+
+#define LAN937X_TAIL_TAG_BLOCKING_OVERRIDE BIT(11)
+#define LAN937X_TAIL_TAG_LOOKUP BIT(12)
+#define LAN937X_TAIL_TAG_VALID BIT(13)
+#define LAN937X_TAIL_TAG_PORT_MASK 7
+
+static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ const struct ethhdr *hdr = eth_hdr(skb);
+ __be16 *tag;
+ u16 val;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
+ tag = skb_put(skb, LAN937X_EGRESS_TAG_LEN);
+
+ val = BIT(dp->index);
+
+ if (is_link_local_ether_addr(hdr->h_dest))
+ val |= LAN937X_TAIL_TAG_BLOCKING_OVERRIDE;
+
+ /* Tail tag valid bit - This bit should always be set by the CPU */
+ val |= LAN937X_TAIL_TAG_VALID;
+
+ put_unaligned_be16(val, tag);
+
+ return skb;
+}
+
+static const struct dsa_device_ops lan937x_netdev_ops = {
+ .name = "lan937x",
+ .proto = DSA_TAG_PROTO_LAN937X,
+ .xmit = lan937x_xmit,
+ .rcv = ksz9477_rcv,
+ .needed_tailroom = LAN937X_EGRESS_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(lan937x_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_LAN937X);
+
static struct dsa_tag_driver *dsa_tag_driver_array[] = {
&DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops),
&DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops),
&DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops),
+ &DSA_TAG_DRIVER_NAME(lan937x_netdev_ops),
};
module_dsa_tag_drivers(dsa_tag_driver_array);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index cb548188f813..98d7d7120bab 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -77,7 +77,6 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
- __be16 *lan9303_tag;
u16 lan9303_tag1;
unsigned int source_port;
@@ -87,14 +86,15 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- lan9303_tag = dsa_etype_header_pos_rx(skb);
-
- if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
- return NULL;
+ if (skb_vlan_tag_present(skb)) {
+ lan9303_tag1 = skb_vlan_tag_get(skb);
+ __vlan_hwaccel_clear_tag(skb);
+ } else {
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &lan9303_tag1);
+ skb_pull_rcsum(skb, ETH_HLEN);
}
- lan9303_tag1 = ntohs(lan9303_tag[1]);
source_port = lan9303_tag1 & 0x3;
skb->dev = dsa_master_find_slave(dev, 0, source_port);
@@ -103,13 +103,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* remove the special VLAN tag between the MAC addresses
- * and the current ethertype field.
- */
- skb_pull_rcsum(skb, 2 + 2);
-
- dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
-
if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 68982b2789a5..37ccf00404ea 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -32,6 +32,13 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
if (!xmit_work_fn || !xmit_worker)
return NULL;
+ /* PTP over IP packets need UDP checksumming. We may have inherited
+ * NETIF_F_HW_CSUM from the DSA master, but these packets are not sent
+ * through the DSA master, so calculate the checksum here.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
if (!xmit_work)
return NULL;
@@ -55,7 +62,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
struct ethhdr *hdr = eth_hdr(skb);
if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest))
@@ -70,7 +77,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
{
int src_port, switch_id;
- dsa_8021q_rcv(skb, &src_port, &switch_id);
+ dsa_8021q_rcv(skb, &src_port, &switch_id, NULL);
skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
if (!skb->dev)
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1ea9401b8ace..57d2e00f1e5d 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -4,30 +4,12 @@
*/
#include <linux/etherdevice.h>
+#include <linux/bitfield.h>
+#include <net/dsa.h>
+#include <linux/dsa/tag_qca.h>
#include "dsa_priv.h"
-#define QCA_HDR_LEN 2
-#define QCA_HDR_VERSION 0x2
-
-#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14)
-#define QCA_HDR_RECV_VERSION_S 14
-#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11)
-#define QCA_HDR_RECV_PRIORITY_S 11
-#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6)
-#define QCA_HDR_RECV_TYPE_S 6
-#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3)
-#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
-
-#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14)
-#define QCA_HDR_XMIT_VERSION_S 14
-#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11)
-#define QCA_HDR_XMIT_PRIORITY_S 11
-#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8)
-#define QCA_HDR_XMIT_CONTROL_S 8
-#define QCA_HDR_XMIT_FROM_CPU BIT(7)
-#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0)
-
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -40,8 +22,9 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
phdr = dsa_etype_header_pos_tx(skb);
/* Set the version field, and set destination port information */
- hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
- QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
+ hdr = FIELD_PREP(QCA_HDR_XMIT_VERSION, QCA_HDR_VERSION);
+ hdr |= QCA_HDR_XMIT_FROM_CPU;
+ hdr |= FIELD_PREP(QCA_HDR_XMIT_DP_BIT, BIT(dp->index));
*phdr = htons(hdr);
@@ -50,10 +33,17 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
- u8 ver;
- u16 hdr;
- int port;
+ struct qca_tagger_data *tagger_data;
+ struct dsa_port *dp = dev->dsa_ptr;
+ struct dsa_switch *ds = dp->ds;
+ u8 ver, pk_type;
__be16 *phdr;
+ int port;
+ u16 hdr;
+
+ BUILD_BUG_ON(sizeof(struct qca_mgmt_ethhdr) != QCA_HDR_MGMT_HEADER_LEN + QCA_HDR_LEN);
+
+ tagger_data = ds->tagger_data;
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
return NULL;
@@ -62,16 +52,33 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
hdr = ntohs(*phdr);
/* Make sure the version is correct */
- ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
+ ver = FIELD_GET(QCA_HDR_RECV_VERSION, hdr);
if (unlikely(ver != QCA_HDR_VERSION))
return NULL;
+ /* Get pk type */
+ pk_type = FIELD_GET(QCA_HDR_RECV_TYPE, hdr);
+
+ /* Ethernet mgmt read/write packet */
+ if (pk_type == QCA_HDR_RECV_TYPE_RW_REG_ACK) {
+ if (likely(tagger_data->rw_reg_ack_handler))
+ tagger_data->rw_reg_ack_handler(ds, skb);
+ return NULL;
+ }
+
+ /* Ethernet MIB counter packet */
+ if (pk_type == QCA_HDR_RECV_TYPE_MIB) {
+ if (likely(tagger_data->mib_autocast_handler))
+ tagger_data->mib_autocast_handler(ds, skb);
+ return NULL;
+ }
+
/* Remove QCA tag and recalculate checksum */
skb_pull_rcsum(skb, QCA_HDR_LEN);
dsa_strip_etype_header(skb, QCA_HDR_LEN);
/* Get source port information */
- port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
+ port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev)
@@ -80,12 +87,34 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
return skb;
}
+static int qca_tag_connect(struct dsa_switch *ds)
+{
+ struct qca_tagger_data *tagger_data;
+
+ tagger_data = kzalloc(sizeof(*tagger_data), GFP_KERNEL);
+ if (!tagger_data)
+ return -ENOMEM;
+
+ ds->tagger_data = tagger_data;
+
+ return 0;
+}
+
+static void qca_tag_disconnect(struct dsa_switch *ds)
+{
+ kfree(ds->tagger_data);
+ ds->tagger_data = NULL;
+}
+
static const struct dsa_device_ops qca_netdev_ops = {
.name = "qca",
.proto = DSA_TAG_PROTO_QCA,
+ .connect = qca_tag_connect,
+ .disconnect = qca_tag_disconnect,
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
.needed_headroom = QCA_HDR_LEN,
+ .promisc_on_master = true,
};
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
index 02686ad4045d..a593ead7ff26 100644
--- a/net/dsa/tag_rtl8_4.c
+++ b/net/dsa/tag_rtl8_4.c
@@ -7,13 +7,8 @@
* NOTE: Currently only supports protocol "4" found in the RTL8365MB, hence
* named tag_rtl8_4.
*
- * This tag header has the following format:
+ * This tag has the following format:
*
- * -------------------------------------------
- * | MAC DA | MAC SA | 8 byte tag | Type | ...
- * -------------------------------------------
- * _______________/ \______________________________________
- * / \
* 0 7|8 15
* |-----------------------------------+-----------------------------------|---
* | (16-bit) | ^
@@ -58,6 +53,24 @@
* TX/RX | TX (switch->CPU): port number the packet was received on
* | RX (CPU->switch): forwarding port mask (if ALLOW=0)
* | allowance port mask (if ALLOW=1)
+ *
+ * The tag can be positioned before Ethertype, using tag "rtl8_4":
+ *
+ * +--------+--------+------------+------+-----
+ * | MAC DA | MAC SA | 8 byte tag | Type | ...
+ * +--------+--------+------------+------+-----
+ *
+ * The tag can also appear between the end of the payload and before the CRC,
+ * using tag "rtl8_4t":
+ *
+ * +--------+--------+------+-----+---------+------------+-----+
+ * | MAC DA | MAC SA | TYPE | ... | payload | 8-byte tag | CRC |
+ * +--------+--------+------+-----+---------+------------+-----+
+ *
+ * The added bytes after the payload will break most checksums, either in
+ * software or hardware. To avoid this issue, if the checksum is still pending,
+ * this tagger checksums the packet in software before adding the tag.
+ *
*/
#include <linux/bitfield.h>
@@ -84,87 +97,133 @@
#define RTL8_4_TX GENMASK(3, 0)
#define RTL8_4_RX GENMASK(10, 0)
-static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
- struct net_device *dev)
+static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev,
+ void *tag)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- __be16 *tag;
-
- skb_push(skb, RTL8_4_TAG_LEN);
-
- dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
- tag = dsa_etype_header_pos_tx(skb);
+ __be16 tag16[RTL8_4_TAG_LEN / 2];
/* Set Realtek EtherType */
- tag[0] = htons(ETH_P_REALTEK);
+ tag16[0] = htons(ETH_P_REALTEK);
/* Set Protocol; zero REASON */
- tag[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB));
+ tag16[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB));
/* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */
- tag[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1));
+ tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1));
/* Zero ALLOW; set RX (CPU->switch) forwarding port mask */
- tag[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index)));
+ tag16[3] = htons(FIELD_PREP(RTL8_4_RX, BIT(dp->index)));
+
+ memcpy(tag, tag16, RTL8_4_TAG_LEN);
+}
+
+static struct sk_buff *rtl8_4_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ skb_push(skb, RTL8_4_TAG_LEN);
+
+ dsa_alloc_etype_header(skb, RTL8_4_TAG_LEN);
+
+ rtl8_4_write_tag(skb, dev, dsa_etype_header_pos_tx(skb));
return skb;
}
-static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
- struct net_device *dev)
+static struct sk_buff *rtl8_4t_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Calculate the checksum here if not done yet as trailing tags will
+ * break either software or hardware based checksum
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
+ rtl8_4_write_tag(skb, dev, skb_put(skb, RTL8_4_TAG_LEN));
+
+ return skb;
+}
+
+static int rtl8_4_read_tag(struct sk_buff *skb, struct net_device *dev,
+ void *tag)
{
- __be16 *tag;
+ __be16 tag16[RTL8_4_TAG_LEN / 2];
u16 etype;
u8 reason;
u8 proto;
u8 port;
- if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
- return NULL;
-
- tag = dsa_etype_header_pos_rx(skb);
+ memcpy(tag16, tag, RTL8_4_TAG_LEN);
/* Parse Realtek EtherType */
- etype = ntohs(tag[0]);
+ etype = ntohs(tag16[0]);
if (unlikely(etype != ETH_P_REALTEK)) {
dev_warn_ratelimited(&dev->dev,
"non-realtek ethertype 0x%04x\n", etype);
- return NULL;
+ return -EPROTO;
}
/* Parse Protocol */
- proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag[1]));
+ proto = FIELD_GET(RTL8_4_PROTOCOL, ntohs(tag16[1]));
if (unlikely(proto != RTL8_4_PROTOCOL_RTL8365MB)) {
dev_warn_ratelimited(&dev->dev,
"unknown realtek protocol 0x%02x\n",
proto);
- return NULL;
+ return -EPROTO;
}
/* Parse REASON */
- reason = FIELD_GET(RTL8_4_REASON, ntohs(tag[1]));
+ reason = FIELD_GET(RTL8_4_REASON, ntohs(tag16[1]));
/* Parse TX (switch->CPU) */
- port = FIELD_GET(RTL8_4_TX, ntohs(tag[3]));
+ port = FIELD_GET(RTL8_4_TX, ntohs(tag16[3]));
skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev) {
dev_warn_ratelimited(&dev->dev,
"could not find slave for port %d\n",
port);
- return NULL;
+ return -ENOENT;
}
+ if (reason != RTL8_4_REASON_TRAP)
+ dsa_default_offload_fwd_mark(skb);
+
+ return 0;
+}
+
+static struct sk_buff *rtl8_4_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ if (unlikely(!pskb_may_pull(skb, RTL8_4_TAG_LEN)))
+ return NULL;
+
+ if (unlikely(rtl8_4_read_tag(skb, dev, dsa_etype_header_pos_rx(skb))))
+ return NULL;
+
/* Remove tag and recalculate checksum */
skb_pull_rcsum(skb, RTL8_4_TAG_LEN);
dsa_strip_etype_header(skb, RTL8_4_TAG_LEN);
- if (reason != RTL8_4_REASON_TRAP)
- dsa_default_offload_fwd_mark(skb);
+ return skb;
+}
+
+static struct sk_buff *rtl8_4t_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ if (skb_linearize(skb))
+ return NULL;
+
+ if (unlikely(rtl8_4_read_tag(skb, dev, skb_tail_pointer(skb) - RTL8_4_TAG_LEN)))
+ return NULL;
+
+ if (pskb_trim_rcsum(skb, skb->len - RTL8_4_TAG_LEN))
+ return NULL;
return skb;
}
+/* Ethertype version */
static const struct dsa_device_ops rtl8_4_netdev_ops = {
.name = "rtl8_4",
.proto = DSA_TAG_PROTO_RTL8_4,
@@ -172,7 +231,28 @@ static const struct dsa_device_ops rtl8_4_netdev_ops = {
.rcv = rtl8_4_tag_rcv,
.needed_headroom = RTL8_4_TAG_LEN,
};
-module_dsa_tag_driver(rtl8_4_netdev_ops);
-MODULE_LICENSE("GPL");
+DSA_TAG_DRIVER(rtl8_4_netdev_ops);
+
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4);
+
+/* Tail version */
+static const struct dsa_device_ops rtl8_4t_netdev_ops = {
+ .name = "rtl8_4t",
+ .proto = DSA_TAG_PROTO_RTL8_4T,
+ .xmit = rtl8_4t_tag_xmit,
+ .rcv = rtl8_4t_tag_rcv,
+ .needed_tailroom = RTL8_4_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(rtl8_4t_netdev_ops);
+
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL8_4T);
+
+static struct dsa_tag_driver *dsa_tag_drivers[] = {
+ &DSA_TAG_DRIVER_NAME(rtl8_4_netdev_ops),
+ &DSA_TAG_DRIVER_NAME(rtl8_4t_netdev_ops),
+};
+module_dsa_tag_drivers(dsa_tag_drivers);
+
+MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c
new file mode 100644
index 000000000000..e2a5ee6ae688
--- /dev/null
+++ b/net/dsa/tag_rzn1_a5psw.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Schneider Electric
+ *
+ * Clément Léger <clement.leger@bootlin.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <net/dsa.h>
+
+#include "dsa_priv.h"
+
+/* To define the outgoing port and to discover the incoming port a TAG is
+ * inserted after Src MAC :
+ *
+ * Dest MAC Src MAC TAG Type
+ * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 5 6 7 8 | 1 2 |...
+ * |<--------------->|
+ *
+ * See struct a5psw_tag for layout
+ */
+
+#define ETH_P_DSA_A5PSW 0xE001
+#define A5PSW_TAG_LEN 8
+#define A5PSW_CTRL_DATA_FORCE_FORWARD BIT(0)
+/* This is both used for xmit tag and rcv tagging */
+#define A5PSW_CTRL_DATA_PORT GENMASK(3, 0)
+
+struct a5psw_tag {
+ __be16 ctrl_tag;
+ __be16 ctrl_data;
+ __be16 ctrl_data2_hi;
+ __be16 ctrl_data2_lo;
+};
+
+static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct a5psw_tag *ptag;
+ u32 data2_val;
+
+ BUILD_BUG_ON(sizeof(*ptag) != A5PSW_TAG_LEN);
+
+ /* The Ethernet switch we are interfaced with needs packets to be at
+ * least 60 bytes otherwise they will be discarded when they enter the
+ * switch port logic.
+ */
+ if (__skb_put_padto(skb, ETH_ZLEN, false))
+ return NULL;
+
+ /* provide 'A5PSW_TAG_LEN' bytes additional space */
+ skb_push(skb, A5PSW_TAG_LEN);
+
+ /* make room between MACs and Ether-Type to insert tag */
+ dsa_alloc_etype_header(skb, A5PSW_TAG_LEN);
+
+ ptag = dsa_etype_header_pos_tx(skb);
+
+ data2_val = FIELD_PREP(A5PSW_CTRL_DATA_PORT, BIT(dp->index));
+ ptag->ctrl_tag = htons(ETH_P_DSA_A5PSW);
+ ptag->ctrl_data = htons(A5PSW_CTRL_DATA_FORCE_FORWARD);
+ ptag->ctrl_data2_lo = htons(data2_val);
+ ptag->ctrl_data2_hi = 0;
+
+ return skb;
+}
+
+static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct a5psw_tag *tag;
+ int port;
+
+ if (unlikely(!pskb_may_pull(skb, A5PSW_TAG_LEN))) {
+ dev_warn_ratelimited(&dev->dev,
+ "Dropping packet, cannot pull\n");
+ return NULL;
+ }
+
+ tag = dsa_etype_header_pos_rx(skb);
+
+ if (tag->ctrl_tag != htons(ETH_P_DSA_A5PSW)) {
+ dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid TAG marker\n");
+ return NULL;
+ }
+
+ port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data));
+
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
+
+ skb_pull_rcsum(skb, A5PSW_TAG_LEN);
+ dsa_strip_etype_header(skb, A5PSW_TAG_LEN);
+
+ dsa_default_offload_fwd_mark(skb);
+
+ return skb;
+}
+
+static const struct dsa_device_ops a5psw_netdev_ops = {
+ .name = "a5psw",
+ .proto = DSA_TAG_PROTO_RZN1_A5PSW,
+ .xmit = a5psw_tag_xmit,
+ .rcv = a5psw_tag_rcv,
+ .needed_headroom = A5PSW_TAG_LEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_A5PSW);
+module_dsa_tag_driver(a5psw_netdev_ops);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 72d5e0ef8dcf..83e4136516b0 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -226,7 +226,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
* TX VLAN that targets the bridge's entire broadcast domain,
* instead of just the specific port.
*/
- tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+ tx_vid = dsa_tag_8021q_bridge_vid(bridge_num);
return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid);
}
@@ -267,7 +267,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
if (skb->offload_fwd_mark)
return sja1105_imprecise_xmit(skb, netdev);
@@ -295,7 +295,7 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(netdev);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
+ u16 tx_vid = dsa_tag_8021q_standalone_vid(dp);
__be32 *tx_trailer;
__be16 *tx_header;
int trailer_pos;
@@ -509,7 +509,7 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
* packet.
*/
static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
- int *switch_id, u16 *vid)
+ int *switch_id, int *vbid, u16 *vid)
{
struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
u16 vlan_tci;
@@ -519,8 +519,8 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
else
vlan_tci = ntohs(hdr->h_vlan_TCI);
- if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK))
- return dsa_8021q_rcv(skb, source_port, switch_id);
+ if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK))
+ return dsa_8021q_rcv(skb, source_port, switch_id, vbid);
/* Try our best with imprecise RX */
*vid = vlan_tci & VLAN_VID_MASK;
@@ -529,7 +529,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1;
+ int source_port = -1, switch_id = -1, vbid = -1;
struct sja1105_meta meta = {0};
struct ethhdr *hdr;
bool is_link_local;
@@ -542,7 +542,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
if (sja1105_skb_has_tag_8021q(skb)) {
/* Normal traffic path. */
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
} else if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
@@ -561,7 +561,9 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- if (source_port == -1 || switch_id == -1)
+ if (vbid >= 1)
+ skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
+ else if (source_port == -1 || switch_id == -1)
skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
else
skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
@@ -686,7 +688,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
struct net_device *netdev)
{
- int source_port = -1, switch_id = -1;
+ int source_port = -1, switch_id = -1, vbid = -1;
bool host_only = false;
u16 vid = 0;
@@ -700,9 +702,11 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
/* Packets with in-band control extensions might still have RX VLANs */
if (likely(sja1105_skb_has_tag_8021q(skb)))
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
- if (source_port == -1 || switch_id == -1)
+ if (vbid >= 1)
+ skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
+ else if (source_port == -1 || switch_id == -1)
skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
else
skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ebcc812735a4..e02daa74e833 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -391,7 +391,7 @@ EXPORT_SYMBOL(ether_setup);
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
- return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
+ return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_ENUM,
ether_setup, txqs, rxqs);
}
EXPORT_SYMBOL(alloc_etherdev_mqs);
@@ -414,12 +414,9 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
off_eth = skb_gro_offset(skb);
hlen = off_eth + sizeof(*eh);
- eh = skb_gro_header_fast(skb, off_eth);
- if (skb_gro_header_hard(skb, hlen)) {
- eh = skb_gro_header_slow(skb, hlen, off_eth);
- if (unlikely(!eh))
- goto out;
- }
+ eh = skb_gro_header(skb, hlen, off_eth);
+ if (unlikely(!eh))
+ goto out;
flush = 0;
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index b76432e70e6b..72ab0944262a 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
- tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o
+ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \
+ pse-pd.o
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 920aac02fe39..06a151165c31 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -356,7 +356,7 @@ out_dev_put:
ethnl_parse_header_dev_put(&req_info);
return ret;
}
-
+
int ethnl_cable_test_amplitude(struct phy_device *phydev,
u8 pair, s16 mV)
{
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 0c5210015911..566adf85e658 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -201,6 +201,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(400000, CR4, Full),
__DEFINE_LINK_MODE_NAME(100, FX, Half),
__DEFINE_LINK_MODE_NAME(100, FX, Full),
+ __DEFINE_LINK_MODE_NAME(10, T1L, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -236,6 +237,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1 1
#define __LINK_MODE_LANES_X 1
#define __LINK_MODE_LANES_FX 1
+#define __LINK_MODE_LANES_T1L 1
#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
[ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
@@ -349,6 +351,7 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
__DEFINE_LINK_MODE_PARAMS(100, FX, Half),
__DEFINE_LINK_MODE_PARAMS(100, FX, Full),
+ __DEFINE_LINK_MODE_PARAMS(10, T1L, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 2dc2b80aea5f..c1779657e074 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -46,6 +46,7 @@ int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max);
int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
+extern const struct ethtool_pse_ops *ethtool_pse_ops;
int ethtool_get_module_info_call(struct net_device *dev,
struct ethtool_modinfo *modinfo);
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 7e6b37a54add..49c0a2a77f02 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -36,7 +36,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
if (request->page)
offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset;
- if (modinfo->type == ETH_MODULE_SFF_8079 &&
+ if (modinfo->type == ETH_MODULE_SFF_8472 &&
request->i2c_address == 0x51)
offset += ETH_MODULE_EEPROM_PAGE_LEN * 2;
@@ -124,7 +124,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
if (ret)
goto err_free;
- ret = get_module_eeprom_by_page(dev, &page_data, info->extack);
+ ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL);
if (ret < 0)
goto err_ops;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 326e14ee05db..57e7238a4136 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -369,22 +369,9 @@ EXPORT_SYMBOL(ethtool_convert_legacy_u32_to_link_mode);
bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
const unsigned long *src)
{
- bool retval = true;
-
- /* TODO: following test will soon always be true */
- if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
- __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
-
- linkmode_zero(ext);
- bitmap_fill(ext, 32);
- bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
- if (linkmode_intersects(ext, src)) {
- /* src mask goes beyond bit 31 */
- retval = false;
- }
- }
*legacy_u32 = src[0];
- return retval;
+ return find_next_bit(src, __ETHTOOL_LINK_MODE_MASK_NBITS, 32) ==
+ __ETHTOOL_LINK_MODE_MASK_NBITS;
}
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
@@ -584,6 +571,7 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
= __ETHTOOL_LINK_MODE_MASK_NU32;
link_ksettings.base.master_slave_cfg = MASTER_SLAVE_CFG_UNSUPPORTED;
link_ksettings.base.master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED;
+ link_ksettings.base.rate_matching = RATE_MATCH_NONE;
return store_link_ksettings_for_user(useraddr, &link_ksettings);
}
@@ -727,16 +715,16 @@ ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp)
const struct ethtool_ops *ops = dev->ethtool_ops;
rsp->info.cmd = ETHTOOL_GDRVINFO;
- strlcpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
+ strscpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version));
if (ops->get_drvinfo) {
ops->get_drvinfo(dev, &rsp->info);
} else if (dev->dev.parent && dev->dev.parent->driver) {
- strlcpy(rsp->info.bus_info, dev_name(dev->dev.parent),
+ strscpy(rsp->info.bus_info, dev_name(dev->dev.parent),
sizeof(rsp->info.bus_info));
- strlcpy(rsp->info.driver, dev->dev.parent->driver->name,
+ strscpy(rsp->info.driver, dev->dev.parent->driver->name,
sizeof(rsp->info.driver));
} else if (dev->rtnl_link_ops) {
- strlcpy(rsp->info.driver, dev->rtnl_link_ops->kind,
+ strscpy(rsp->info.driver, dev->rtnl_link_ops->kind,
sizeof(rsp->info.driver));
} else {
return -EOPNOTSUPP;
@@ -2010,7 +1998,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
* removal of the device.
*/
busy = true;
- dev_hold_track(dev, &dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &dev_tracker, GFP_KERNEL);
rtnl_unlock();
if (rc == 0) {
@@ -2034,7 +2022,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
}
rtnl_lock();
- dev_put_track(dev, &dev_tracker);
+ netdev_put(dev, &dev_tracker);
busy = false;
(void) ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 99b29b4fe947..126e06c713a3 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -70,6 +70,7 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base,
+ nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */
+ nla_total_size(sizeof(u32)) /* LINKMODES_LANES */
+ nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */
+ + nla_total_size(sizeof(u8)) /* LINKMODES_RATE_MATCHING */
+ 0;
ret = ethnl_bitset_size(ksettings->link_modes.advertising,
ksettings->link_modes.supported,
@@ -143,6 +144,10 @@ static int linkmodes_fill_reply(struct sk_buff *skb,
lsettings->master_slave_state))
return -EMSGSIZE;
+ if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_RATE_MATCHING,
+ lsettings->rate_matching))
+ return -EMSGSIZE;
+
return 0;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 5fe8f4ae2ceb..1a4c11356c96 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -286,6 +286,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_STATS_GET] = &ethnl_stats_request_ops,
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
[ETHTOOL_MSG_MODULE_GET] = &ethnl_module_request_ops,
+ [ETHTOOL_MSG_PSE_GET] = &ethnl_pse_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -361,6 +362,9 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ops = ethnl_default_requests[cmd];
if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
return -EOPNOTSUPP;
+ if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
+ return -EINVAL;
+
req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
if (!req_info)
return -ENOMEM;
@@ -402,7 +406,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ops->cleanup_data(reply_data);
genlmsg_end(rskb, reply_payload);
- dev_put_track(req_info->dev, &req_info->dev_tracker);
+ netdev_put(req_info->dev, &req_info->dev_tracker);
kfree(reply_data);
kfree(req_info);
return genlmsg_reply(rskb, info);
@@ -414,7 +418,7 @@ err_cleanup:
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
err_dev:
- dev_put_track(req_info->dev, &req_info->dev_tracker);
+ netdev_put(req_info->dev, &req_info->dev_tracker);
kfree(reply_data);
kfree(req_info);
return ret;
@@ -550,7 +554,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
* same parser as for non-dump (doit) requests is used, it
* would take reference to the device if it finds one
*/
- dev_put_track(req_info->dev, &req_info->dev_tracker);
+ netdev_put(req_info->dev, &req_info->dev_tracker);
req_info->dev = NULL;
}
if (ret < 0)
@@ -1020,6 +1024,22 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_module_set_policy,
.maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PSE_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_pse_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_pse_get_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_PSE_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_pse,
+ .policy = ethnl_pse_set_policy,
+ .maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
@@ -1033,6 +1053,7 @@ static struct genl_family ethtool_genl_family __ro_after_init = {
.parallel_ops = true,
.ops = ethtool_genl_ops,
.n_ops = ARRAY_SIZE(ethtool_genl_ops),
+ .resv_start_op = ETHTOOL_MSG_MODULE_GET + 1,
.mcgrps = ethtool_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps),
};
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 75856db299e9..1bfd374f9718 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -237,7 +237,7 @@ struct ethnl_req_info {
static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
{
- dev_put_track(req_info->dev, &req_info->dev_tracker);
+ netdev_put(req_info->dev, &req_info->dev_tracker);
}
/**
@@ -345,6 +345,7 @@ extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
extern const struct ethnl_request_ops ethnl_stats_request_ops;
extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct ethnl_request_ops ethnl_module_request_ops;
+extern const struct ethnl_request_ops ethnl_pse_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -363,7 +364,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_BUF_LEN + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
@@ -383,6 +384,8 @@ extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1
extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1];
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
+extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
+extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
@@ -402,6 +405,7 @@ int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_module(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
new file mode 100644
index 000000000000..e8683e485dc9
--- /dev/null
+++ b/net/ethtool/pse-pd.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// ethtool interface for for Ethernet PSE (Power Sourcing Equipment)
+// and PD (Powered Device)
+//
+// Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+//
+
+#include "common.h"
+#include "linux/pse-pd/pse.h"
+#include "netlink.h"
+#include <linux/ethtool_netlink.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+
+struct pse_req_info {
+ struct ethnl_req_info base;
+};
+
+struct pse_reply_data {
+ struct ethnl_reply_data base;
+ struct pse_control_status status;
+};
+
+#define PSE_REPDATA(__reply_base) \
+ container_of(__reply_base, struct pse_reply_data, base)
+
+/* PSE_GET */
+
+const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1] = {
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int pse_get_pse_attributes(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct pse_reply_data *data)
+{
+ struct phy_device *phydev = dev->phydev;
+
+ if (!phydev) {
+ NL_SET_ERR_MSG(extack, "No PHY is attached");
+ return -EOPNOTSUPP;
+ }
+
+ if (!phydev->psec) {
+ NL_SET_ERR_MSG(extack, "No PSE is attached");
+ return -EOPNOTSUPP;
+ }
+
+ memset(&data->status, 0, sizeof(data->status));
+
+ return pse_ethtool_get_status(phydev->psec, extack, &data->status);
+}
+
+static int pse_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct pse_reply_data *data = PSE_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = pse_get_pse_attributes(dev, info ? info->extack : NULL, data);
+
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int pse_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct pse_reply_data *data = PSE_REPDATA(reply_base);
+ const struct pse_control_status *st = &data->status;
+ int len = 0;
+
+ if (st->podl_admin_state > 0)
+ len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */
+ if (st->podl_pw_status > 0)
+ len += nla_total_size(sizeof(u32)); /* _PODL_PSE_PW_D_STATUS */
+
+ return len;
+}
+
+static int pse_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct pse_reply_data *data = PSE_REPDATA(reply_base);
+ const struct pse_control_status *st = &data->status;
+
+ if (st->podl_admin_state > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PODL_PSE_ADMIN_STATE,
+ st->podl_admin_state))
+ return -EMSGSIZE;
+
+ if (st->podl_pw_status > 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PODL_PSE_PW_D_STATUS,
+ st->podl_pw_status))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_pse_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PSE_GET,
+ .reply_cmd = ETHTOOL_MSG_PSE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PSE_HEADER,
+ .req_info_size = sizeof(struct pse_req_info),
+ .reply_data_size = sizeof(struct pse_reply_data),
+
+ .prepare_data = pse_prepare_data,
+ .reply_size = pse_reply_size,
+ .fill_reply = pse_fill_reply,
+};
+
+/* PSE_SET */
+
+const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
+ NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
+};
+
+static int pse_set_pse_config(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb)
+{
+ struct phy_device *phydev = dev->phydev;
+ struct pse_control_config config = {};
+
+ /* Optional attribute. Do not return error if not set. */
+ if (!tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL])
+ return 0;
+
+ /* this values are already validated by the ethnl_pse_set_policy */
+ config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+
+ if (!phydev) {
+ NL_SET_ERR_MSG(extack, "No PHY is attached");
+ return -EOPNOTSUPP;
+ }
+
+ if (!phydev->psec) {
+ NL_SET_ERR_MSG(extack, "No PSE is attached");
+ return -EOPNOTSUPP;
+ }
+
+ return pse_ethtool_set_config(phydev->psec, extack, &config);
+}
+
+int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct net_device *dev;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_PSE_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ dev = req_info.dev;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = pse_set_pse_config(dev, info->extack, tb);
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+
+ ethnl_parse_header_dev_put(&req_info);
+
+ return ret;
+}
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index c1d5f5e0fdc9..fa3ec8d438f7 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -53,7 +53,10 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */
nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */
nla_total_size(sizeof(u32)) + /* _RINGS_TX */
- nla_total_size(sizeof(u32)); /* _RINGS_RX_BUF_LEN */
+ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */
+ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */
+ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */
+ nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -61,9 +64,11 @@ static int rings_fill_reply(struct sk_buff *skb,
const struct ethnl_reply_data *reply_base)
{
const struct rings_reply_data *data = RINGS_REPDATA(reply_base);
- const struct kernel_ethtool_ringparam *kernel_ringparam = &data->kernel_ringparam;
+ const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam;
const struct ethtool_ringparam *ringparam = &data->ringparam;
+ WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+
if ((ringparam->rx_max_pending &&
(nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX,
ringparam->rx_max_pending) ||
@@ -84,9 +89,14 @@ static int rings_fill_reply(struct sk_buff *skb,
ringparam->tx_max_pending) ||
nla_put_u32(skb, ETHTOOL_A_RINGS_TX,
ringparam->tx_pending))) ||
- (kernel_ringparam->rx_buf_len &&
- (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN,
- kernel_ringparam->rx_buf_len))))
+ (kr->rx_buf_len &&
+ (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) ||
+ (kr->tcp_data_split &&
+ (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
+ kr->tcp_data_split))) ||
+ (kr->cqe_size &&
+ (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) ||
+ nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push))
return -EMSGSIZE;
return 0;
@@ -114,6 +124,8 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
};
int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
@@ -140,6 +152,33 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
if (!ops->get_ringparam || !ops->set_ringparam)
goto out_dev;
+ if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_RX_BUF_LEN],
+ "setting rx buf len not supported");
+ goto out_dev;
+ }
+
+ if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_CQE_SIZE],
+ "setting cqe size not supported");
+ goto out_dev;
+ }
+
+ if (tb[ETHTOOL_A_RINGS_TX_PUSH] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_TX_PUSH],
+ "setting tx push not supported");
+ goto out_dev;
+ }
+
rtnl_lock();
ret = ethnl_ops_begin(dev);
if (ret < 0)
@@ -154,6 +193,10 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod);
ethnl_update_u32(&kernel_ringparam.rx_buf_len,
tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod);
+ ethnl_update_u32(&kernel_ringparam.cqe_size,
+ tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
+ ethnl_update_u8(&kernel_ringparam.tx_push,
+ tb[ETHTOOL_A_RINGS_TX_PUSH], &mod);
ret = 0;
if (!mod)
goto out_ops;
@@ -176,15 +219,6 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
goto out_ops;
}
- if (kernel_ringparam.rx_buf_len != 0 &&
- !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) {
- ret = -EOPNOTSUPP;
- NL_SET_ERR_MSG_ATTR(info->extack,
- tb[ETHTOOL_A_RINGS_RX_BUF_LEN],
- "setting rx buf len not supported");
- goto out_ops;
- }
-
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
if (ret < 0)
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 2d51b7ab4dc5..3f7de54d85fb 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -167,7 +167,7 @@ static int strset_get_id(const struct nlattr *nest, u32 *val,
get_stringset_policy, extack);
if (ret < 0)
return ret;
- if (!tb[ETHTOOL_A_STRINGSET_ID])
+ if (NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_STRINGSET_ID))
return -EINVAL;
*val = nla_get_u32(tb[ETHTOOL_A_STRINGSET_ID]);
diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
index efde33536687..67fb414ca859 100644
--- a/net/ethtool/tunnels.c
+++ b/net/ethtool/tunnels.c
@@ -136,6 +136,8 @@ ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base,
goto err_cancel_table;
entry = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
+ if (!entry)
+ goto err_cancel_entry;
if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
htons(IANA_VXLAN_UDP_PORT)) ||
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 99f3af1a9d4d..de476a417631 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -1,22 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* debugfs code for HSR & PRP
* Copyright (C) 2019 Texas Instruments Incorporated
*
* Author(s):
* Murali Karicheri <m-karicheri2@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/debugfs.h>
+#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
@@ -28,6 +21,7 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
{
struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
struct hsr_node *node;
+ int i;
seq_printf(sfp, "Node Table entries for (%s) device\n",
(priv->prot_version == PRP_V1 ? "PRP" : "HSR"));
@@ -39,22 +33,28 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
seq_puts(sfp, "DAN-H\n");
rcu_read_lock();
- list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
- /* skip self node */
- if (hsr_addr_is_self(priv, node->macaddress_A))
- continue;
- seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
- seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
- seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]);
- seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]);
- seq_printf(sfp, "%14x, ", node->addr_B_port);
-
- if (priv->prot_version == PRP_V1)
- seq_printf(sfp, "%5x, %5x, %5x\n",
- node->san_a, node->san_b,
- (node->san_a == 0 && node->san_b == 0));
- else
- seq_printf(sfp, "%5x\n", 1);
+
+ for (i = 0 ; i < priv->hash_buckets; i++) {
+ hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) {
+ /* skip self node */
+ if (hsr_addr_is_self(priv, node->macaddress_A))
+ continue;
+ seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
+ seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
+ seq_printf(sfp, "%10lx, ",
+ node->time_in[HSR_PT_SLAVE_A]);
+ seq_printf(sfp, "%10lx, ",
+ node->time_in[HSR_PT_SLAVE_B]);
+ seq_printf(sfp, "%14x, ", node->addr_B_port);
+
+ if (priv->prot_version == PRP_V1)
+ seq_printf(sfp, "%5x, %5x, %5x\n",
+ node->san_a, node->san_b,
+ (node->san_a == 0 &&
+ node->san_b == 0));
+ else
+ seq_printf(sfp, "%5x\n", 1);
+ }
}
rcu_read_unlock();
return 0;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e57fdad9ef94..6ffef47e9be5 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -221,7 +221,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_len(skb);
hsr_forward_skb(skb, master);
} else {
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
dev_kfree_skb_any(skb);
}
return NETDEV_TX_OK;
@@ -485,12 +485,16 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
{
bool unregister = false;
struct hsr_priv *hsr;
- int res;
+ int res, i;
hsr = netdev_priv(hsr_dev);
INIT_LIST_HEAD(&hsr->ports);
- INIT_LIST_HEAD(&hsr->node_db);
- INIT_LIST_HEAD(&hsr->self_node_db);
+ INIT_HLIST_HEAD(&hsr->self_node_db);
+ hsr->hash_buckets = HSR_HSIZE;
+ get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed));
+ for (i = 0; i < hsr->hash_buckets; i++)
+ INIT_HLIST_HEAD(&hsr->node_db[i]);
+
spin_lock_init(&hsr->list_lock);
eth_hw_addr_set(hsr_dev, slave[0]->dev_addr);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index e59cbb4f0cd1..a50429a62f74 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -150,15 +150,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
struct hsr_port *port)
{
if (!frame->skb_std) {
- if (frame->skb_hsr) {
+ if (frame->skb_hsr)
frame->skb_std =
create_stripped_skb_hsr(frame->skb_hsr, frame);
- } else {
- /* Unexpected */
- WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
- __FILE__, __LINE__, port->dev->name);
+ else
+ netdev_warn_once(port->dev,
+ "Unexpected frame received in hsr_get_untagged_frame()\n");
+
+ if (!frame->skb_std)
return NULL;
- }
}
return skb_clone(frame->skb_std, GFP_ATOMIC);
@@ -570,20 +570,23 @@ static int fill_frame_info(struct hsr_frame_info *frame,
struct ethhdr *ethhdr;
__be16 proto;
int ret;
+ u32 hash;
/* Check if skb contains ethhdr */
if (skb->mac_len < sizeof(struct ethhdr))
return -EINVAL;
memset(frame, 0, sizeof(*frame));
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ hash = hsr_mac_hash(port->hsr, ethhdr->h_source);
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
+ frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb,
frame->is_supervision,
port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
frame->is_vlan = false;
proto = ethhdr->h_proto;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 0775f0f95dbf..584e21788799 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -15,11 +15,36 @@
#include <linux/etherdevice.h>
#include <linux/slab.h>
#include <linux/rculist.h>
+#include <linux/jhash.h>
#include "hsr_main.h"
#include "hsr_framereg.h"
#include "hsr_netlink.h"
-/* TODO: use hash lists for mac addresses (linux/jhash.h)? */
+#ifdef CONFIG_LOCKDEP
+int lockdep_hsr_is_held(spinlock_t *lock)
+{
+ return lockdep_is_held(lock);
+}
+#endif
+
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr)
+{
+ u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed);
+
+ return reciprocal_scale(hash, hsr->hash_buckets);
+}
+
+struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock)
+{
+ struct hlist_node *first;
+
+ first = rcu_dereference_bh_check(hlist_first_rcu(head),
+ lockdep_hsr_is_held(lock));
+ if (first)
+ return hlist_entry(first, struct hsr_node, mac_list);
+
+ return NULL;
+}
/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b,
* false otherwise.
@@ -42,8 +67,7 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
{
struct hsr_node *node;
- node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node,
- mac_list);
+ node = hsr_node_get_first(&hsr->self_node_db, &hsr->list_lock);
if (!node) {
WARN_ONCE(1, "HSR: No self node\n");
return false;
@@ -59,12 +83,12 @@ bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
/* Search for mac entry. Caller must hold rcu read lock.
*/
-static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db,
const unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, addr))
return node;
}
@@ -79,7 +103,7 @@ int hsr_create_self_node(struct hsr_priv *hsr,
const unsigned char addr_a[ETH_ALEN],
const unsigned char addr_b[ETH_ALEN])
{
- struct list_head *self_node_db = &hsr->self_node_db;
+ struct hlist_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node, *oldnode;
node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -90,14 +114,13 @@ int hsr_create_self_node(struct hsr_priv *hsr,
ether_addr_copy(node->macaddress_B, addr_b);
spin_lock_bh(&hsr->list_lock);
- oldnode = list_first_or_null_rcu(self_node_db,
- struct hsr_node, mac_list);
+ oldnode = hsr_node_get_first(self_node_db, &hsr->list_lock);
if (oldnode) {
- list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+ hlist_replace_rcu(&oldnode->mac_list, &node->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(oldnode, rcu_head);
} else {
- list_add_tail_rcu(&node->mac_list, self_node_db);
+ hlist_add_tail_rcu(&node->mac_list, self_node_db);
spin_unlock_bh(&hsr->list_lock);
}
@@ -106,25 +129,25 @@ int hsr_create_self_node(struct hsr_priv *hsr,
void hsr_del_self_node(struct hsr_priv *hsr)
{
- struct list_head *self_node_db = &hsr->self_node_db;
+ struct hlist_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node;
spin_lock_bh(&hsr->list_lock);
- node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+ node = hsr_node_get_first(self_node_db, &hsr->list_lock);
if (node) {
- list_del_rcu(&node->mac_list);
+ hlist_del_rcu(&node->mac_list);
kfree_rcu(node, rcu_head);
}
spin_unlock_bh(&hsr->list_lock);
}
-void hsr_del_nodes(struct list_head *node_db)
+void hsr_del_nodes(struct hlist_head *node_db)
{
struct hsr_node *node;
- struct hsr_node *tmp;
+ struct hlist_node *tmp;
- list_for_each_entry_safe(node, tmp, node_db, mac_list)
- kfree(node);
+ hlist_for_each_entry_safe(node, tmp, node_db, mac_list)
+ kfree_rcu(node, rcu_head);
}
void prp_handle_san_frame(bool san, enum hsr_port_type port,
@@ -145,7 +168,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
* originating from the newly added node.
*/
static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
- struct list_head *node_db,
+ struct hlist_head *node_db,
unsigned char addr[],
u16 seq_out, bool san,
enum hsr_port_type rx_port)
@@ -175,14 +198,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list,
- lockdep_is_held(&hsr->list_lock)) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_hsr_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
goto out;
}
- list_add_tail_rcu(&new_node->mac_list, node_db);
+ hlist_add_tail_rcu(&new_node->mac_list, node_db);
spin_unlock_bh(&hsr->list_lock);
return new_node;
out:
@@ -202,7 +225,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup)
/* Get the hsr_node from which 'skb' was sent.
*/
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port)
{
@@ -218,7 +241,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
ethhdr = (struct ethhdr *)skb_mac_header(skb);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ hlist_for_each_entry_rcu(node, node_db, mac_list) {
if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
if (hsr->proto_ops->update_san_info)
hsr->proto_ops->update_san_info(node, is_sup);
@@ -268,11 +291,12 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
struct hsr_sup_tlv *hsr_sup_tlv;
struct hsr_node *node_real;
struct sk_buff *skb = NULL;
- struct list_head *node_db;
+ struct hlist_head *node_db;
struct ethhdr *ethhdr;
int i;
unsigned int pull_size = 0;
unsigned int total_pull_size = 0;
+ u32 hash;
/* Here either frame->skb_hsr or frame->skb_prp should be
* valid as supervision frame always will have protocol
@@ -310,11 +334,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Merge node_curr (registered on macaddress_B) into node_real */
- node_db = &port_rcv->hsr->node_db;
- node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
+ node_db = port_rcv->hsr->node_db;
+ hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A);
+ node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A);
if (!node_real)
/* No frame received from AddrA of this node yet */
- node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
+ node_real = hsr_add_node(hsr, &node_db[hash],
+ hsr_sp->macaddress_A,
HSR_SEQNR_START - 1, true,
port_rcv->type);
if (!node_real)
@@ -348,7 +374,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
hsr_sp = (struct hsr_sup_payload *)skb->data;
/* Check if redbox mac and node mac are equal. */
- if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) {
+ if (!ether_addr_equal(node_real->macaddress_A,
+ hsr_sp->macaddress_A)) {
/* This is a redbox supervision frame for a VDAN! */
goto done;
}
@@ -368,7 +395,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
- list_del_rcu(&node_curr->mac_list);
+ hlist_del_rcu(&node_curr->mac_list);
spin_unlock_bh(&hsr->list_lock);
kfree_rcu(node_curr, rcu_head);
@@ -406,6 +433,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
struct hsr_port *port)
{
struct hsr_node *node_dst;
+ u32 hash;
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: Mac header not set\n", __func__);
@@ -415,7 +443,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest))
return;
- node_dst = find_node_by_addr_A(&port->hsr->node_db,
+ hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest);
+ node_dst = find_node_by_addr_A(&port->hsr->node_db[hash],
eth_hdr(skb)->h_dest);
if (!node_dst) {
if (net_ratelimit())
@@ -491,59 +520,73 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
void hsr_prune_nodes(struct timer_list *t)
{
struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
+ struct hlist_node *tmp;
struct hsr_node *node;
- struct hsr_node *tmp;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
+ int i;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
- /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
- * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
- * the master port. Thus the master node will be repeatedly
- * pruned leading to packet loss.
- */
- if (hsr_addr_is_self(hsr, node->macaddress_A))
- continue;
-
- /* Shorthand */
- time_a = node->time_in[HSR_PT_SLAVE_A];
- time_b = node->time_in[HSR_PT_SLAVE_B];
-
- /* Check for timestamps old enough to risk wrap-around */
- if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_A] = true;
- if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
- node->time_in_stale[HSR_PT_SLAVE_B] = true;
-
- /* Get age of newest frame from node.
- * At least one time_in is OK here; nodes get pruned long
- * before both time_ins can get stale
- */
- timestamp = time_a;
- if (node->time_in_stale[HSR_PT_SLAVE_A] ||
- (!node->time_in_stale[HSR_PT_SLAVE_B] &&
- time_after(time_b, time_a)))
- timestamp = time_b;
-
- /* Warn of ring error only as long as we get frames at all */
- if (time_is_after_jiffies(timestamp +
- msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
- rcu_read_lock();
- port = get_late_port(hsr, node);
- if (port)
- hsr_nl_ringerror(hsr, node->macaddress_A, port);
- rcu_read_unlock();
- }
- /* Prune old entries */
- if (time_is_before_jiffies(timestamp +
- msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
- hsr_nl_nodedown(hsr, node->macaddress_A);
- list_del_rcu(&node->mac_list);
- /* Note that we need to free this entry later: */
- kfree_rcu(node, rcu_head);
+ for (i = 0; i < hsr->hash_buckets; i++) {
+ hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i],
+ mac_list) {
+ /* Don't prune own node.
+ * Neither time_in[HSR_PT_SLAVE_A]
+ * nor time_in[HSR_PT_SLAVE_B], will ever be updated
+ * for the master port. Thus the master node will be
+ * repeatedly pruned leading to packet loss.
+ */
+ if (hsr_addr_is_self(hsr, node->macaddress_A))
+ continue;
+
+ /* Shorthand */
+ time_a = node->time_in[HSR_PT_SLAVE_A];
+ time_b = node->time_in[HSR_PT_SLAVE_B];
+
+ /* Check for timestamps old enough to
+ * risk wrap-around
+ */
+ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_A] = true;
+ if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2))
+ node->time_in_stale[HSR_PT_SLAVE_B] = true;
+
+ /* Get age of newest frame from node.
+ * At least one time_in is OK here; nodes get pruned
+ * long before both time_ins can get stale
+ */
+ timestamp = time_a;
+ if (node->time_in_stale[HSR_PT_SLAVE_A] ||
+ (!node->time_in_stale[HSR_PT_SLAVE_B] &&
+ time_after(time_b, time_a)))
+ timestamp = time_b;
+
+ /* Warn of ring error only as long as we get
+ * frames at all
+ */
+ if (time_is_after_jiffies(timestamp +
+ msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) {
+ rcu_read_lock();
+ port = get_late_port(hsr, node);
+ if (port)
+ hsr_nl_ringerror(hsr,
+ node->macaddress_A,
+ port);
+ rcu_read_unlock();
+ }
+
+ /* Prune old entries */
+ if (time_is_before_jiffies(timestamp +
+ msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+ hsr_nl_nodedown(hsr, node->macaddress_A);
+ hlist_del_rcu(&node->mac_list);
+ /* Note that we need to free this
+ * entry later:
+ */
+ kfree_rcu(node, rcu_head);
+ }
}
}
spin_unlock_bh(&hsr->list_lock);
@@ -557,17 +600,20 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos,
unsigned char addr[ETH_ALEN])
{
struct hsr_node *node;
+ u32 hash;
+
+ hash = hsr_mac_hash(hsr, addr);
if (!_pos) {
- node = list_first_or_null_rcu(&hsr->node_db,
- struct hsr_node, mac_list);
+ node = hsr_node_get_first(&hsr->node_db[hash],
+ &hsr->list_lock);
if (node)
ether_addr_copy(addr, node->macaddress_A);
return node;
}
node = _pos;
- list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) {
+ hlist_for_each_entry_continue_rcu(node, mac_list) {
ether_addr_copy(addr, node->macaddress_A);
return node;
}
@@ -587,8 +633,11 @@ int hsr_get_node_data(struct hsr_priv *hsr,
struct hsr_node *node;
struct hsr_port *port;
unsigned long tdiff;
+ u32 hash;
+
+ hash = hsr_mac_hash(hsr, addr);
- node = find_node_by_addr_A(&hsr->node_db, addr);
+ node = find_node_by_addr_A(&hsr->node_db[hash], addr);
if (!node)
return -ENOENT;
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index bdbb8c822ba1..f3762e9e42b5 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -28,9 +28,17 @@ struct hsr_frame_info {
bool is_from_san;
};
+#ifdef CONFIG_LOCKDEP
+int lockdep_hsr_is_held(spinlock_t *lock);
+#else
+#define lockdep_hsr_is_held(lock) 1
+#endif
+
+u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr);
+struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock);
void hsr_del_self_node(struct hsr_priv *hsr);
-void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+void hsr_del_nodes(struct hlist_head *node_db);
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db,
struct sk_buff *skb, bool is_sup,
enum hsr_port_type rx_port);
void hsr_handle_sup_frame(struct hsr_frame_info *frame);
@@ -68,7 +76,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port,
void prp_update_san_info(struct hsr_node *node, bool is_sup);
struct hsr_node {
- struct list_head mac_list;
+ struct hlist_node mac_list;
unsigned char macaddress_A[ETH_ALEN];
unsigned char macaddress_B[ETH_ALEN];
/* Local slave through which AddrB frames are received from this node */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 043e4e9a1694..b158ba409f9a 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -45,24 +45,11 @@
/* PRP V1 life redundancy box MAC address */
#define PRP_TLV_REDBOX_MAC 30
-/* HSR Tag.
- * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
- * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest,
- * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr,
- * encapsulated protocol } instead.
- *
- * Field names as defined in the IEC:2010 standard for HSR.
- */
-struct hsr_tag {
- __be16 path_and_LSDU_size;
- __be16 sequence_nr;
- __be16 encap_proto;
-} __packed;
-
-#define HSR_HLEN 6
-
#define HSR_V1_SUP_LSDUSIZE 52
+#define HSR_HSIZE_SHIFT 8
+#define HSR_HSIZE BIT(HSR_HSIZE_SHIFT)
+
/* The helper functions below assumes that 'path' occupies the 4 most
* significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
* equivalently, the 4 most significant bits of HSR tag byte 14).
@@ -201,8 +188,8 @@ struct hsr_proto_ops {
struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
- struct list_head node_db; /* Known HSR nodes */
- struct list_head self_node_db; /* MACs of slaves */
+ struct hlist_head node_db[HSR_HSIZE]; /* Known HSR nodes */
+ struct hlist_head self_node_db; /* MACs of slaves */
struct timer_list announce_timer; /* Supervision frame dispatch */
struct timer_list prune_timer;
int announce_count;
@@ -212,6 +199,8 @@ struct hsr_priv {
spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
struct hsr_proto_ops *proto_ops;
+ u32 hash_buckets;
+ u32 hash_seed;
#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
* based on SLAVE_A or SLAVE_B
*/
@@ -259,11 +248,6 @@ static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct)
return ntohs(rct->sequence_nr);
}
-static inline u16 get_prp_lan_id(struct prp_rct *rct)
-{
- return ntohs(rct->lan_id_and_LSDU_size) >> 12;
-}
-
/* assume there is a valid rct */
static inline bool prp_check_lsdu_size(struct sk_buff *skb,
struct prp_rct *rct,
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f3c8f91dbe2c..7174a9092900 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -105,6 +105,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
static void hsr_dellink(struct net_device *dev, struct list_head *head)
{
struct hsr_priv *hsr = netdev_priv(dev);
+ int i;
del_timer_sync(&hsr->prune_timer);
del_timer_sync(&hsr->announce_timer);
@@ -113,7 +114,8 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
hsr_del_ports(hsr);
hsr_del_self_node(hsr);
- hsr_del_nodes(&hsr->node_db);
+ for (i = 0; i < hsr->hash_buckets; i++)
+ hsr_del_nodes(&hsr->node_db[i]);
unregister_netdevice_queue(dev, head);
}
@@ -520,6 +522,7 @@ static struct genl_family hsr_genl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = hsr_ops,
.n_small_ops = ARRAY_SIZE(hsr_ops),
+ .resv_start_op = HSR_C_SET_NODE_LIST + 1,
.mcgrps = hsr_mcgrps,
.n_mcgrps = ARRAY_SIZE(hsr_mcgrps),
};
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 2cf62718a282..2c087b7f17c5 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/ieee802154.h>
+#include <linux/if_arp.h>
#include <net/ipv6.h>
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index be6f06adefe0..a91283d1e5bf 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -130,6 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
goto err;
fq->q.stamp = skb->tstamp;
+ fq->q.mono_delivery_time = skb->mono_delivery_time;
if (frag_type == LOWPAN_DISPATCH_FRAG1)
fq->q.flags |= INET_FRAG_FIRST_IN;
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index b07abc38b4b3..7d2de4ee6992 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -132,6 +132,7 @@ struct genl_family nl802154_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = ieee802154_ops,
.n_small_ops = ARRAY_SIZE(ieee802154_ops),
+ .resv_start_op = IEEE802154_LLSEC_DEL_SECLEVEL + 1,
.mcgrps = ieee802154_mcgrps,
.n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
};
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index dd5a45f8a78a..359249ab77bf 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -30,7 +30,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
{
void *hdr;
int i, pages = 0;
- uint32_t *buf = kcalloc(32, sizeof(uint32_t), GFP_KERNEL);
+ u32 *buf = kcalloc(IEEE802154_MAX_PAGE + 1, sizeof(u32), GFP_KERNEL);
pr_debug("%s\n", __func__);
@@ -47,7 +47,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
nla_put_u8(msg, IEEE802154_ATTR_PAGE, phy->current_page) ||
nla_put_u8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel))
goto nla_put_failure;
- for (i = 0; i < 32; i++) {
+ for (i = 0; i <= IEEE802154_MAX_PAGE; i++) {
if (phy->supported.channels[i])
buf[pages++] = phy->supported.channels[i] | (i << 27);
}
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 277124f206e0..38c4f3cb010e 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -2500,6 +2500,7 @@ static struct genl_family nl802154_fam __ro_after_init = {
.module = THIS_MODULE,
.ops = nl802154_ops,
.n_ops = ARRAY_SIZE(nl802154_ops),
+ .resv_start_op = NL802154_CMD_DEL_SEC_LEVEL + 1,
.mcgrps = nl802154_mcgrps,
.n_mcgrps = ARRAY_SIZE(nl802154_mcgrps),
};
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 3b2366a88c3c..1fa2fe041ec0 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -200,8 +200,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
int err = 0;
struct net_device *dev = NULL;
- if (len < sizeof(*uaddr))
- return -EINVAL;
+ err = ieee802154_sockaddr_check_size(uaddr, len);
+ if (err < 0)
+ return err;
uaddr = (struct sockaddr_ieee802154 *)_uaddr;
if (uaddr->family != AF_IEEE802154)
@@ -271,6 +272,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
err = -EMSGSIZE;
goto out_dev;
}
+ if (!size) {
+ err = 0;
+ goto out_dev;
+ }
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
@@ -308,13 +313,13 @@ out:
}
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -328,7 +333,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
@@ -493,11 +498,14 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
ro->bound = 0;
- if (len < sizeof(*addr))
+ err = ieee802154_sockaddr_check_size(addr, len);
+ if (err < 0)
goto out;
- if (addr->family != AF_IEEE802154)
+ if (addr->family != AF_IEEE802154) {
+ err = -EINVAL;
goto out;
+ }
ieee802154_addr_from_sa(&haddr, &addr->addr);
dev = ieee802154_get_dev(sock_net(sk), &haddr);
@@ -564,8 +572,9 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
struct dgram_sock *ro = dgram_sk(sk);
int err = 0;
- if (len < sizeof(*addr))
- return -EINVAL;
+ err = ieee802154_sockaddr_check_size(addr, len);
+ if (err < 0)
+ return err;
if (addr->family != AF_IEEE802154)
return -EINVAL;
@@ -604,6 +613,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
struct ieee802154_mac_cb *cb;
struct dgram_sock *ro = dgram_sk(sk);
struct ieee802154_addr dst_addr;
+ DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
int hlen, tlen;
int err;
@@ -612,10 +622,20 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
return -EOPNOTSUPP;
}
- if (!ro->connected && !msg->msg_name)
- return -EDESTADDRREQ;
- else if (ro->connected && msg->msg_name)
- return -EISCONN;
+ if (msg->msg_name) {
+ if (ro->connected)
+ return -EISCONN;
+ if (msg->msg_namelen < IEEE802154_MIN_NAMELEN)
+ return -EINVAL;
+ err = ieee802154_sockaddr_check_size(daddr, msg->msg_namelen);
+ if (err < 0)
+ return err;
+ ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+ } else {
+ if (!ro->connected)
+ return -EDESTADDRREQ;
+ dst_addr = ro->dst_addr;
+ }
if (!ro->bound)
dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
@@ -651,16 +671,6 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
cb = mac_cb_init(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
cb->ackreq = ro->want_ack;
-
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
- daddr, msg->msg_name);
-
- ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
- } else {
- dst_addr = ro->dst_addr;
- }
-
cb->secen = ro->secen;
cb->secen_override = ro->secen_override;
cb->seclevel = ro->seclevel;
@@ -695,7 +705,7 @@ out:
}
static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
@@ -703,7 +713,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct dgram_sock *ro = dgram_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -718,7 +728,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (saddr) {
/* Clear the implicit padding in struct sockaddr_ieee802154
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..e983bb0c5012 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -321,7 +321,6 @@ config NET_UDP_TUNNEL
config NET_FOU
tristate "IP: Foo (IP protocols) over UDP"
- select XFRM
select NET_UDP_TUNNEL
help
Foo over UDP allows any IP protocol to be directly encapsulated
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c465bac1eb0..4728087c42a5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -148,10 +148,10 @@ void inet_sock_destruct(struct sock *sk)
return;
}
- WARN_ON(atomic_read(&sk->sk_rmem_alloc));
- WARN_ON(refcount_read(&sk->sk_wmem_alloc));
- WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk_forward_alloc_get(sk));
+ WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc));
+ WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
+ WARN_ON_ONCE(sk->sk_wmem_queued);
+ WARN_ON_ONCE(sk_forward_alloc_get(sk));
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
@@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -335,7 +335,7 @@ lookup_protocol:
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -558,22 +558,27 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
+
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+
if (uaddr->sa_family == AF_UNSPEC)
- return sk->sk_prot->disconnect(sk, flags);
+ return prot->disconnect(sk, flags);
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
- err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ err = prot->pre_connect(sk, uaddr, addr_len);
if (err)
return err;
}
if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
return -EAGAIN;
- return sk->sk_prot->connect(sk, uaddr, addr_len);
+ return prot->connect(sk, uaddr, addr_len);
}
EXPORT_SYMBOL(inet_dgram_connect);
@@ -734,10 +739,11 @@ EXPORT_SYMBOL(inet_stream_connect);
int inet_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
- struct sock *sk1 = sock->sk;
+ struct sock *sk1 = sock->sk, *sk2;
int err = -EINVAL;
- struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern);
if (!sk2)
goto do_err;
@@ -748,6 +754,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
(TCPF_ESTABLISHED | TCPF_SYN_RECV |
TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
+ set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
sock_graft(sk2, newsock);
newsock->state = SS_CONNECTED;
@@ -825,18 +833,21 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
- if (sk->sk_prot->sendpage)
- return sk->sk_prot->sendpage(sk, page, offset, size, flags);
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ if (prot->sendpage)
+ return prot->sendpage(sk, page, offset, size, flags);
return sock_no_sendpage(sock, page, offset, size, flags);
}
EXPORT_SYMBOL(inet_sendpage);
INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -848,8 +859,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
sock_rps_record_flow(sk);
err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
@@ -1041,6 +1051,7 @@ const struct proto_ops inet_stream_ops = {
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
+ .read_skb = tcp_read_skb,
.sendmsg_locked = tcp_sendmsg_locked,
.sendpage_locked = tcp_sendpage_locked,
.peek_len = tcp_peek_len,
@@ -1068,7 +1079,7 @@ const struct proto_ops inet_dgram_ops = {
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
- .read_sock = udp_read_sock,
+ .read_skb = udp_read_skb,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
@@ -1219,6 +1230,7 @@ EXPORT_SYMBOL(inet_unregister_protosw);
static int inet_sk_reselect_saddr(struct sock *sk)
{
+ struct inet_bind_hashbucket *prev_addr_hashbucket;
struct inet_sock *inet = inet_sk(sk);
__be32 old_saddr = inet->inet_saddr;
__be32 daddr = inet->inet_daddr;
@@ -1226,6 +1238,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct rtable *rt;
__be32 new_saddr;
struct ip_options_rcu *inet_opt;
+ int err;
inet_opt = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
@@ -1234,26 +1247,40 @@ static int inet_sk_reselect_saddr(struct sock *sk)
/* Query new route. */
fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
- sk->sk_bound_dev_if, sk->sk_protocol,
- inet->inet_sport, inet->inet_dport, sk);
+ rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
+ sk->sk_protocol, inet->inet_sport,
+ inet->inet_dport, sk);
if (IS_ERR(rt))
return PTR_ERR(rt);
- sk_setup_caps(sk, &rt->dst);
-
new_saddr = fl4->saddr;
- if (new_saddr == old_saddr)
+ if (new_saddr == old_saddr) {
+ sk_setup_caps(sk, &rt->dst);
return 0;
+ }
+
+ prev_addr_hashbucket =
+ inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk,
+ sock_net(sk), inet->inet_num);
+
+ inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
+
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ inet->inet_saddr = old_saddr;
+ inet->inet_rcv_saddr = old_saddr;
+ ip_rt_put(rt);
+ return err;
+ }
+
+ sk_setup_caps(sk, &rt->dst);
- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
- inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
-
/*
* XXX The only one ugly spot where we need to
* XXX really change the sockets identity after
@@ -1302,7 +1329,7 @@ int inet_sk_rebuild_header(struct sock *sk)
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1376,8 +1403,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
}
ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_segment))
+ if (likely(ops && ops->callbacks.gso_segment)) {
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
if (IS_ERR_OR_NULL(segs))
goto out;
@@ -1445,12 +1475,9 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
+ iph = skb_gro_header(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
proto = iph->protocol;
@@ -1708,24 +1735,14 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1927,6 +1944,8 @@ static int __init inet_init(void)
sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
+ raw_hashinfo_init(&raw_v4_hashinfo);
+
rc = proto_register(&tcp_prot, 1);
if (rc)
goto out;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 6eea1e9e998d..ee4e578c7f20 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -471,30 +471,38 @@ static int ah4_err(struct sk_buff *skb, u32 info)
return 0;
}
-static int ah_init_state(struct xfrm_state *x)
+static int ah_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
struct ah_data *ahp = NULL;
struct xfrm_algo_desc *aalg_desc;
struct crypto_ahash *ahash;
- if (!x->aalg)
+ if (!x->aalg) {
+ NL_SET_ERR_MSG(extack, "AH requires a state with an AUTH algorithm");
goto error;
+ }
- if (x->encap)
+ if (x->encap) {
+ NL_SET_ERR_MSG(extack, "AH is not compatible with encapsulation");
goto error;
+ }
ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
if (!ahp)
return -ENOMEM;
ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
- if (IS_ERR(ahash))
+ if (IS_ERR(ahash)) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
+ }
ahp->ahash = ahash;
if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
- (x->aalg->alg_key_len + 7) / 8))
+ (x->aalg->alg_key_len + 7) / 8)) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
+ }
/*
* Lookup the algorithm description maintained by xfrm_algo,
@@ -507,10 +515,7 @@ static int ah_init_state(struct xfrm_state *x)
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- pr_info("%s: %s digestsize %u != %hu\n",
- __func__, x->aalg->alg_name,
- crypto_ahash_digestsize(ahash),
- aalg_desc->uinfo.auth.icv_fullbits / 8);
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4db0325f6e1a..4f7237661afb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -168,6 +168,7 @@ struct neigh_table arp_tbl = {
[NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
+ [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
[NEIGH_VAR_PROXY_QLEN] = 64,
@@ -293,7 +294,7 @@ static int arp_constructor(struct neighbour *neigh)
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
{
dst_link_failure(skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
}
/* Create and send an arp packet. */
@@ -428,6 +429,26 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
return !inet_confirm_addr(net, in_dev, sip, tip, scope);
}
+static int arp_accept(struct in_device *in_dev, __be32 sip)
+{
+ struct net *net = dev_net(in_dev->dev);
+ int scope = RT_SCOPE_LINK;
+
+ switch (IN_DEV_ARP_ACCEPT(in_dev)) {
+ case 0: /* Don't create new entries from garp */
+ return 0;
+ case 1: /* Create new entries from garp */
+ return 1;
+ case 2: /* Create a neighbor in the arp table only if sip
+ * is in the same subnet as an address configured
+ * on the interface that received the garp message
+ */
+ return !!inet_confirm_addr(net, in_dev, sip, 0, scope);
+ default:
+ return 0;
+ }
+}
+
static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
{
struct rtable *rt;
@@ -867,12 +888,12 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
addr_type = -1;
- if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
+ if (n || arp_accept(in_dev, sip)) {
is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
sip, tip, sha, tha);
}
- if (IN_DEV_ARP_ACCEPT(in_dev)) {
+ if (arp_accept(in_dev, sip)) {
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
@@ -1108,7 +1129,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
- strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
+ strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
err = 0;
}
neigh_release(neigh);
@@ -1116,13 +1137,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
return err;
}
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
+ if ((neigh->nud_state & NUD_VALID) && !force) {
+ neigh_release(neigh);
+ return 0;
+ }
+
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
@@ -1169,7 +1195,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
if (!dev)
return -EINVAL;
}
- return arp_invalidate(dev, ip);
+ return arp_invalidate(dev, ip, true);
}
/*
@@ -1299,9 +1325,9 @@ static struct packet_type arp_packet_type __read_mostly = {
.func = arp_rcv,
};
+#ifdef CONFIG_PROC_FS
#if IS_ENABLED(CONFIG_AX25)
-/* ------------------------------------------------------------------------ */
/*
* ax25 -> ASCII conversion
*/
@@ -1407,16 +1433,13 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP);
}
-/* ------------------------------------------------------------------------ */
-
static const struct seq_operations arp_seq_ops = {
.start = arp_seq_start,
.next = neigh_seq_next,
.stop = neigh_seq_stop,
.show = arp_seq_show,
};
-
-/* ------------------------------------------------------------------------ */
+#endif /* CONFIG_PROC_FS */
static int __net_init arp_net_init(struct net *net)
{
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index de610cb83694..6da16ae6a962 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
+#include <linux/init.h>
#include <linux/types.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
@@ -13,18 +14,6 @@
/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
extern struct bpf_struct_ops bpf_tcp_congestion_ops;
-static u32 optional_ops[] = {
- offsetof(struct tcp_congestion_ops, init),
- offsetof(struct tcp_congestion_ops, release),
- offsetof(struct tcp_congestion_ops, set_state),
- offsetof(struct tcp_congestion_ops, cwnd_event),
- offsetof(struct tcp_congestion_ops, in_ack_event),
- offsetof(struct tcp_congestion_ops, pkts_acked),
- offsetof(struct tcp_congestion_ops, min_tso_segs),
- offsetof(struct tcp_congestion_ops, sndbuf_expand),
- offsetof(struct tcp_congestion_ops, cong_control),
-};
-
static u32 unsupported_ops[] = {
offsetof(struct tcp_congestion_ops, get_info),
};
@@ -50,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
return 0;
}
-static bool is_optional(u32 member_offset)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
- if (member_offset == optional_ops[i])
- return true;
- }
-
- return false;
-}
-
static bool is_unsupported(u32 member_offset)
{
unsigned int i;
@@ -95,12 +72,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
size_t end;
if (atype == BPF_READ)
- return btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (t != tcp_sock_type) {
bpf_log(log, "only read is supported\n");
@@ -108,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
}
switch (off) {
+ case offsetof(struct sock, sk_pacing_rate):
+ end = offsetofend(struct sock, sk_pacing_rate);
+ break;
+ case offsetof(struct sock, sk_pacing_status):
+ end = offsetofend(struct sock, sk_pacing_status);
+ break;
case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
break;
@@ -139,7 +124,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- return NOT_INIT;
+ return 0;
}
BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt)
@@ -212,26 +197,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
}
}
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
-BTF_ID(func, tcp_reno_ssthresh)
-BTF_ID(func, tcp_reno_cong_avoid)
-BTF_ID(func, tcp_reno_undo_cwnd)
-BTF_ID(func, tcp_slow_start)
-BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
-
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
- if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
- return true;
- return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids)
+BTF_ID_FLAGS(func, tcp_reno_ssthresh)
+BTF_ID_FLAGS(func, tcp_reno_cong_avoid)
+BTF_ID_FLAGS(func, tcp_reno_undo_cwnd)
+BTF_ID_FLAGS(func, tcp_slow_start)
+BTF_ID_FLAGS(func, tcp_cong_avoid_ai)
+BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_tcp_ca_check_kfunc_ids,
+};
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
.get_func_proto = bpf_tcp_ca_get_func_proto,
.is_valid_access = bpf_tcp_ca_is_valid_access,
.btf_struct_access = bpf_tcp_ca_btf_struct_access,
- .check_kfunc_call = bpf_tcp_ca_check_kfunc_call,
};
static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
{
const struct tcp_congestion_ops *utcp_ca;
struct tcp_congestion_ops *tcp_ca;
- int prog_fd;
u32 moff;
utcp_ca = (const struct tcp_congestion_ops *)udata;
@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
return 1;
}
- if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
- return 0;
-
- /* Ensure bpf_prog is provided for compulsory func ptr */
- prog_fd = (int)(*(unsigned long *)(udata + moff));
- if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
- return -EINVAL;
-
return 0;
}
@@ -300,3 +273,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.init = bpf_tcp_ca_init,
.name = "tcp_congestion_ops",
};
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 62d5f99760aa..6cd3b6c559f0 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash;
- if (!cipso_v4_cache_enabled)
+ if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
+ int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len;
- if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0;
cipso_ptr_len = cipso_ptr[1];
@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
- if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
} else {
@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
/* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
- if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
+ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
+ ret_val <= 10)
tag_len = 14;
else
tag_len = 4 + ret_val;
@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
* all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */
- if (cipso_v4_rbm_strictvalid) {
+ if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) {
err_offset = opt_iter + 3;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 48f337ccf949..4d1af0cd7d99 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -42,12 +42,13 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
+ } else if (!oif) {
+ oif = inet->uc_index;
}
fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
- RT_CONN_FLAGS(sk), oif,
- sk->sk_protocol,
- inet->inet_sport, usin->sin_port, sk);
+ rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
+ sk->sk_protocol, inet->inet_sport,
+ usin->sin_port, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
@@ -69,10 +70,10 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
}
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
- reuseport_has_conns(sk, true);
+ reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
- inet->inet_id = prandom_u32();
+ inet->inet_id = get_random_u16();
sk_dst_set(sk, &rt->dst);
err = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index fba2bffd65f7..e8b9a9202fec 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -104,6 +104,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_FLAGS] = { .type = NLA_U32 },
[IFA_RT_PRIORITY] = { .type = NLA_U32 },
[IFA_TARGET_NETNSID] = { .type = NLA_S32 },
+ [IFA_PROTO] = { .type = NLA_U8 },
};
struct inet_fill_args {
@@ -243,7 +244,7 @@ void in_dev_finish_destroy(struct in_device *idev)
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif
- dev_put_track(dev, &idev->dev_tracker);
+ netdev_put(dev, &idev->dev_tracker);
if (!idev->dead)
pr_err("Freeing alive in_device %p\n", idev);
else
@@ -271,7 +272,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
dev_disable_lro(dev);
/* Reference in_dev->dev */
- dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
/* Account for reference dev->ip_ptr (below) */
refcount_set(&in_dev->refcnt, 1);
@@ -535,10 +536,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return ret;
}
- if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
- prandom_seed((__force u32) ifa->ifa_local);
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY))
ifap = last_primary;
- }
rcu_assign_pointer(ifa->ifa_next, *ifap);
rcu_assign_pointer(*ifap, ifa);
@@ -889,6 +888,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (tb[IFA_RT_PRIORITY])
ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+ if (tb[IFA_PROTO])
+ ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -1625,6 +1627,7 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(1) /* IFA_PROTO */
+ nla_total_size(4) /* IFA_RT_PRIORITY */
+ nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
@@ -1699,6 +1702,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
(ifa->ifa_label[0] &&
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+ (ifa->ifa_proto &&
+ nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
(ifa->ifa_rt_priority &&
nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
@@ -2566,7 +2571,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
struct devinet_sysctl_table *t;
char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
- t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto out;
@@ -2677,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net)
#endif
if (!net_eq(net, &init_net)) {
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- sysctl_devconf_inherit_init_net == 3) {
+ switch (net_inherit_devconf()) {
+ case 3:
/* copy from the current netns */
memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt,
current->nsproxy->net_ns->ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
- } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
- sysctl_devconf_inherit_init_net != 2) {
- /* inherit == 0 or 1: copy from init_net */
+ break;
+ case 0:
+ case 1:
+ /* copy from init_net */
memcpy(all, init_net.ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt, init_net.ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
+ break;
+ case 2:
+ /* use compiled values */
+ break;
}
- /* else inherit == 2: use compiled values */
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 851f542928a3..52c8047efedb 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -134,6 +134,7 @@ static void esp_free_tcp_sk(struct rcu_head *head)
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
+ struct net *net = xs_net(x);
struct esp_tcp_sk *esk;
__be16 sport, dport;
struct sock *nsk;
@@ -160,7 +161,7 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
}
spin_unlock_bh(&x->lock);
- sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
+ sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
dport, x->props.saddr.a4, sport, 0);
if (!sk)
return ERR_PTR(-ENOENT);
@@ -455,6 +456,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -498,9 +503,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
nfrags++;
- skb->len += tailen;
- skb->data_len += tailen;
- skb->truesize += tailen;
+ skb_len_add(skb, tailen);
if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
@@ -671,7 +674,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -701,7 +704,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen, hlen, elen;
int padlen, trimlen;
@@ -713,11 +715,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
elen = skb->len - hlen;
- if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
- ret = xo->proto;
- goto out;
- }
-
if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
BUG();
@@ -1011,16 +1008,17 @@ static void esp_destroy(struct xfrm_state *x)
crypto_free_aead(aead);
}
-static int esp_init_aead(struct xfrm_state *x)
+static int esp_init_aead(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
- x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
- goto error;
+ x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) {
+ NL_SET_ERR_MSG(extack, "Algorithm name is too long");
+ return -ENAMETOOLONG;
+ }
aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
@@ -1038,11 +1036,15 @@ static int esp_init_aead(struct xfrm_state *x)
if (err)
goto error;
+ return 0;
+
error:
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
return err;
}
-static int esp_init_authenc(struct xfrm_state *x)
+static int esp_init_authenc(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
{
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
@@ -1053,10 +1055,6 @@ static int esp_init_authenc(struct xfrm_state *x)
unsigned int keylen;
int err;
- err = -EINVAL;
- if (!x->ealg)
- goto error;
-
err = -ENAMETOOLONG;
if ((x->props.flags & XFRM_STATE_ESN)) {
@@ -1065,22 +1063,28 @@ static int esp_init_authenc(struct xfrm_state *x)
x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
x->ealg->alg_name,
- x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) {
+ NL_SET_ERR_MSG(extack, "Algorithm name is too long");
goto error;
+ }
} else {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
"%s%sauthenc(%s,%s)%s",
x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
x->ealg->alg_name,
- x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) {
+ NL_SET_ERR_MSG(extack, "Algorithm name is too long");
goto error;
+ }
}
aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
- if (IS_ERR(aead))
+ if (IS_ERR(aead)) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
+ }
x->data = aead;
@@ -1110,17 +1114,16 @@ static int esp_init_authenc(struct xfrm_state *x)
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- pr_info("ESP: %s digestsize %u != %hu\n",
- x->aalg->alg_name,
- crypto_aead_authsize(aead),
- aalg_desc->uinfo.auth.icv_fullbits / 8);
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto free_key;
}
err = crypto_aead_setauthsize(
aead, x->aalg->alg_trunc_len / 8);
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto free_key;
+ }
}
param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
@@ -1135,7 +1138,7 @@ error:
return err;
}
-static int esp_init_state(struct xfrm_state *x)
+static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
struct crypto_aead *aead;
u32 align;
@@ -1143,10 +1146,14 @@ static int esp_init_state(struct xfrm_state *x)
x->data = NULL;
- if (x->aead)
- err = esp_init_aead(x);
- else
- err = esp_init_authenc(x);
+ if (x->aead) {
+ err = esp_init_aead(x, extack);
+ } else if (x->ealg) {
+ err = esp_init_authenc(x, extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "ESP: AEAD or CRYPT must be provided");
+ err = -EINVAL;
+ }
if (err)
goto error;
@@ -1164,6 +1171,7 @@ static int esp_init_state(struct xfrm_state *x)
switch (encap->encap_type) {
default:
+ NL_SET_ERR_MSG(extack, "Unsupported encapsulation type for ESP");
err = -EINVAL;
goto error;
case UDP_ENCAP_ESPINUDP:
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index d87f02a6e934..170152772d33 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -110,8 +110,10 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
+ __be16 type = x->inner_mode.family == AF_INET6 ? htons(ETH_P_IPV6)
+ : htons(ETH_P_IP);
+
+ return skb_eth_gso_segment(skb, features, type);
}
static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
@@ -160,6 +162,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
}
+ if (proto == IPPROTO_IPV6)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
+
__skb_pull(skb, skb_transport_offset(skb));
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..f361d3d56be2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -290,7 +291,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
struct flowi4 fl4 = {
.flowi4_iif = LOOPBACK_IFINDEX,
- .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
+ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
.flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
.flowi4_scope = scope,
@@ -352,9 +353,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
- if (!fl4.flowi4_iif)
- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+ fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
@@ -436,6 +436,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (net->ipv4.fib_has_custom_local_routes ||
fib4_has_custom_rules(net))
goto full_check;
+ /* Within the same container, it is regarded as a martian source,
+ * and the same host but different containers are not.
+ */
if (inet_lookup_ifaddr_rcu(net, src))
return -EINVAL;
@@ -735,8 +738,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
memset(cfg, 0, sizeof(*cfg));
rtm = nlmsg_data(nlh);
+
+ if (!inet_validate_dscp(rtm->rtm_tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ err = -EINVAL;
+ goto errout;
+ }
+ cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+
cfg->fc_dst_len = rtm->rtm_dst_len;
- cfg->fc_tos = rtm->rtm_tos;
cfg->fc_table = rtm->rtm_table;
cfg->fc_protocol = rtm->rtm_protocol;
cfg->fc_scope = rtm->rtm_scope;
@@ -1112,9 +1123,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0);
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
+ }
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -1128,6 +1141,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
if (ifa->ifa_prefixlen < 31) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
+ arp_invalidate(dev, prefix | ~mask, false);
}
}
}
@@ -1370,7 +1384,7 @@ static void nl_fib_input(struct sk_buff *skb)
return;
nlh = nlmsg_hdr(skb);
- frn = (struct fib_result_nl *) nlmsg_data(nlh);
+ frn = nlmsg_data(nlh);
nl_fib_lookup(net, frn);
portid = NETLINK_CB(skb).portid; /* netlink portid */
@@ -1411,7 +1425,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event,
static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct in_ifaddr *ifa = ptr;
struct net_device *dev = ifa->ifa_dev->dev;
struct net *net = dev_net(dev);
@@ -1547,7 +1561,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;
- rtnl_lock();
+ ASSERT_RTNL();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1572,7 +1586,7 @@ static void ip_fib_net_exit(struct net *net)
#ifdef CONFIG_IP_MULTIPLE_TABLES
fib4_rules_exit(net);
#endif
- rtnl_unlock();
+
kfree(net->ipv4.fib_table_hash);
fib4_notifier_exit(net);
}
@@ -1599,7 +1613,9 @@ out:
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
+ rtnl_lock();
ip_fib_net_exit(net);
+ rtnl_unlock();
goto out;
}
@@ -1607,12 +1623,23 @@ static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
- ip_fib_net_exit(net);
+}
+
+static void __net_exit fib_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ip_fib_net_exit(net);
+
+ rtnl_unlock();
}
static struct pernet_operations fib_net_ops = {
.init = fib_net_init,
.exit = fib_net_exit,
+ .exit_batch = fib_net_exit_batch,
};
void __init ip_fib_init(void)
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..f9b9e26c32c1 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -4,22 +4,22 @@
#include <linux/types.h>
#include <linux/list.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/nexthop.h>
struct fib_alias {
struct hlist_node fa_list;
struct fib_info *fa_info;
- u8 fa_tos;
+ dscp_t fa_dscp;
u8 fa_type;
u8 fa_state;
u8 fa_slen;
u32 tb_id;
s16 fa_default;
- u8 offload:1,
- trap:1,
- offload_failed:1,
- unused:5;
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index e0b6c8b6de57..513f475c6a53 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -23,6 +23,7 @@
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp.h>
@@ -35,7 +36,7 @@ struct fib4_rule {
struct fib_rule common;
u8 dst_len;
u8 src_len;
- u8 tos;
+ dscp_t dscp;
__be32 src;
__be32 srcmask;
__be32 dst;
@@ -49,7 +50,7 @@ static bool fib4_rule_matchall(const struct fib_rule *rule)
{
struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
- if (r->dst_len || r->src_len || r->tos)
+ if (r->dst_len || r->src_len || r->dscp)
return false;
return fib_rule_matchall(rule);
}
@@ -144,7 +145,7 @@ INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
int flags,
struct fib_lookup_arg *arg)
{
- struct fib_result *result = (struct fib_result *) arg->result;
+ struct fib_result *result = arg->result;
struct net_device *dev = NULL;
if (result->fi) {
@@ -185,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->tos && (r->tos != fl4->flowi4_tos))
+ if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -225,10 +226,17 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+ if (!inet_validate_dscp(frh->tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ goto errout;
+ }
+ /* IPv4 currently doesn't handle high order DSCP bits correctly */
if (frh->tos & ~IPTOS_TOS_MASK) {
NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
}
+ rule4->dscp = inet_dsfield_to_dscp(frh->tos);
/* split local/main if they are not already split */
err = fib_unmerge(net);
@@ -270,7 +278,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
rule4->dstmask = inet_make_mask(rule4->dst_len);
- rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
@@ -313,7 +320,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;
- if (frh->tos && (rule4->tos != frh->tos))
+ if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
return 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -337,7 +344,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
- frh->tos = rule4->tos;
+ frh->tos = inet_dscp_to_dsfield(rule4->dscp);
if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 828de171708f..f721c308248b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,8 +29,10 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
+#include <linux/hash.h>
#include <net/arp.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -51,6 +53,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
+static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
#define DEVINDEX_HASHBITS 8
@@ -208,7 +211,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
void fib_nh_common_release(struct fib_nh_common *nhc)
{
- dev_put_track(nhc->nhc_dev, &nhc->nhc_dev_tracker);
+ netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker);
lwtstate_put(nhc->nhc_lwtstate);
rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
rt_fibinfo_free(&nhc->nhc_rth_input);
@@ -249,7 +252,6 @@ void free_fib_info(struct fib_info *fi)
pr_warn("Freeing alive fib_info %p\n", fi);
return;
}
- fib_info_cnt--;
call_rcu(&fi->rcu, free_fib_info_rcu);
}
@@ -260,6 +262,10 @@ void fib_release_info(struct fib_info *fi)
spin_lock_bh(&fib_info_lock);
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
+
+ /* Paired with READ_ONCE() in fib_create_info(). */
+ WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
if (fi->nh) {
@@ -316,11 +322,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
- unsigned int mask = DEVINDEX_HASHSIZE - 1;
+ return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+ u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
- return (val ^
- (val >> DEVINDEX_HASHBITS) ^
- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
+ return &fib_info_devhash[fib_devindex_hashfn(val)];
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -430,12 +440,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- unsigned int hash;
spin_lock(&fib_info_lock);
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
+
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
@@ -515,11 +524,11 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
- fri.tos = fa->fa_tos;
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -879,9 +888,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
return 1;
}
+ /* cannot match on nexthop object attributes */
+ if (fi->nh)
+ return 1;
+
if (cfg->fc_oif || cfg->fc_gw_family) {
- struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct fib_nh *nh;
+ nh = fib_info_nh(fi, 0);
if (cfg->fc_encap) {
if (fib_encap_match(net, cfg->fc_encap_type,
cfg->fc_encap, nh, cfg, extack))
@@ -1043,7 +1057,8 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
if (!err) {
nh->fib_nh_dev = fib6_nh.fib_nh_dev;
- dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL);
+ netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker,
+ GFP_KERNEL);
nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
nh->fib_nh_scope = RT_SCOPE_LINK;
@@ -1127,7 +1142,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
nh->fib_nh_dev = dev;
- dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
nh->fib_nh_scope = RT_SCOPE_LINK;
return 0;
}
@@ -1181,7 +1196,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
"No egress device for nexthop gateway");
goto out;
}
- dev_hold_track(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
@@ -1215,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
}
nh->fib_nh_dev = in_dev->dev;
- dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
+ netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
nh->fib_nh_scope = RT_SCOPE_HOST;
if (!netif_carrier_ok(nh->fib_nh_dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
@@ -1240,34 +1255,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static inline unsigned int fib_laddr_hashfn(__be32 val)
-{
- unsigned int mask = (fib_info_hash_size - 1);
-
- return ((__force u32)val ^
- ((__force u32)val >> 7) ^
- ((__force u32)val >> 14)) & mask;
-}
-
-static struct hlist_head *fib_info_hash_alloc(int bytes)
-{
- if (bytes <= PAGE_SIZE)
- return kzalloc(bytes, GFP_KERNEL);
- else
- return (struct hlist_head *)
- __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(bytes));
-}
-
-static void fib_info_hash_free(struct hlist_head *hash, int bytes)
+static struct hlist_head *
+fib_info_laddrhash_bucket(const struct net *net, __be32 val)
{
- if (!hash)
- return;
+ u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
+ fib_info_hash_bits);
- if (bytes <= PAGE_SIZE)
- kfree(hash);
- else
- free_pages((unsigned long) hash, get_order(bytes));
+ return &fib_info_laddrhash[slot];
}
static void fib_info_hash_move(struct hlist_head *new_info_hash,
@@ -1276,12 +1270,13 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
{
struct hlist_head *old_info_hash, *old_laddrhash;
unsigned int old_size = fib_info_hash_size;
- unsigned int i, bytes;
+ unsigned int i;
spin_lock_bh(&fib_info_lock);
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
+ fib_info_hash_bits = ilog2(new_size);
for (i = 0; i < old_size; i++) {
struct hlist_head *head = &fib_info_hash[i];
@@ -1299,27 +1294,25 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
fib_info_hash = new_info_hash;
+ fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &fib_info_laddrhash[i];
+ struct hlist_head *lhead = &old_laddrhash[i];
struct hlist_node *n;
struct fib_info *fi;
hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
struct hlist_head *ldest;
- unsigned int new_hash;
- new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
- ldest = &new_laddrhash[new_hash];
+ ldest = fib_info_laddrhash_bucket(fi->fib_net,
+ fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, ldest);
}
}
- fib_info_laddrhash = new_laddrhash;
spin_unlock_bh(&fib_info_lock);
- bytes = old_size * sizeof(struct hlist_head *);
- fib_info_hash_free(old_info_hash, bytes);
- fib_info_hash_free(old_laddrhash, bytes);
+ kvfree(old_info_hash);
+ kvfree(old_laddrhash);
}
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
@@ -1430,23 +1423,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
#endif
err = -ENOBUFS;
- if (fib_info_cnt >= fib_info_hash_size) {
+
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
- unsigned int bytes;
+ size_t bytes;
if (!new_size)
new_size = 16;
- bytes = new_size * sizeof(struct hlist_head *);
- new_info_hash = fib_info_hash_alloc(bytes);
- new_laddrhash = fib_info_hash_alloc(bytes);
+ bytes = (size_t)new_size * sizeof(struct hlist_head *);
+ new_info_hash = kvzalloc(bytes, GFP_KERNEL);
+ new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
if (!new_info_hash || !new_laddrhash) {
- fib_info_hash_free(new_info_hash, bytes);
- fib_info_hash_free(new_laddrhash, bytes);
- } else
+ kvfree(new_info_hash);
+ kvfree(new_laddrhash);
+ } else {
fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
-
+ }
if (!fib_info_hash_size)
goto failure;
}
@@ -1462,7 +1457,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
return ERR_PTR(err);
}
- fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1591,12 +1585,13 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
+ fib_info_cnt++;
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
if (fi->fib_prefsrc) {
struct hlist_head *head;
- head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+ head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, head);
}
if (fi->nh) {
@@ -1604,12 +1599,10 @@ link_it:
} else {
change_nexthops(fi) {
struct hlist_head *head;
- unsigned int hash;
if (!nexthop_nh->fib_nh_dev)
continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
@@ -1789,7 +1782,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = fri->dst_len;
rtm->rtm_src_len = 0;
- rtm->rtm_tos = fri->tos;
+ rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp);
if (tb_id < 256)
rtm->rtm_table = tb_id;
else
@@ -1819,7 +1812,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
+ if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
goto offload;
}
@@ -1870,16 +1863,16 @@ nla_put_failure:
*/
int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
- int ret = 0;
- unsigned int hash = fib_laddr_hashfn(local);
- struct hlist_head *head = &fib_info_laddrhash[hash];
int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
struct net *net = dev_net(dev);
+ struct hlist_head *head;
struct fib_info *fi;
+ int ret = 0;
if (!fib_info_laddrhash || local == 0)
return 0;
+ head = fib_info_laddrhash_bucket(net, local);
hlist_for_each_entry(fi, head, fib_lhash) {
if (!net_eq(fi->fib_net, net) ||
fi->fib_tb_id != tb_id)
@@ -1961,8 +1954,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -1981,12 +1973,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_info *prev_fi = NULL;
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
+ int ret = 0;
if (force)
scope = -1;
@@ -2055,7 +2046,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
int order = -1, last_idx = -1;
struct fib_alias *fa, *fa1 = NULL;
u32 last_prio = res->fi->fib_priority;
- u8 last_tos = 0;
+ dscp_t last_dscp = 0;
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
@@ -2063,19 +2054,20 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ if (fa->fa_dscp &&
+ fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
continue;
if (fa->tb_id != tb->tb_id)
continue;
if (next_fi->fib_priority > last_prio &&
- fa->fa_tos == last_tos) {
- if (last_tos)
+ fa->fa_dscp == last_dscp) {
+ if (last_dscp)
continue;
break;
}
if (next_fi->fib_flags & RTNH_F_DEAD)
continue;
- last_tos = fa->fa_tos;
+ last_dscp = fa->fa_dscp;
last_prio = next_fi->fib_priority;
if (next_fi->fib_scope != res->scope ||
@@ -2131,7 +2123,6 @@ out:
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
- unsigned int hash;
struct hlist_head *head;
struct fib_nh *nh;
int ret;
@@ -2147,8 +2138,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -2227,7 +2217,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
}
change_nexthops(fi) {
- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
if (!fib_good_nh(nexthop_nh))
continue;
if (!first) {
@@ -2250,7 +2240,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
- if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+ if (fl4->flowi4_oif)
goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..452ff177e4da 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -61,6 +61,7 @@
#include <linux/vmalloc.h>
#include <linux/notifier.h>
#include <net/net_namespace.h>
+#include <net/inet_dscp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
@@ -81,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = fa->fa_tos,
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -98,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = fa->fa_tos,
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -497,7 +498,7 @@ static void tnode_free(struct key_vector *tn)
tn = container_of(head, struct tnode, rcu)->kv;
}
- if (tnode_free_size >= sysctl_fib_sync_mem) {
+ if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
synchronize_rcu();
}
@@ -973,13 +974,13 @@ static struct key_vector *fib_find_node(struct trie *t,
return n;
}
-/* Return the first fib alias matching TOS with
+/* Return the first fib alias matching DSCP with
* priority less than or equal to PRIO.
* If 'find_first' is set, return the first matching
- * fib alias, regardless of TOS and priority.
+ * fib alias, regardless of DSCP and priority.
*/
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
- u8 tos, u32 prio, u32 tb_id,
+ dscp_t dscp, u32 prio, u32 tb_id,
bool find_first)
{
struct fib_alias *fa;
@@ -988,6 +989,10 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
return NULL;
hlist_for_each_entry(fa, fah, fa_list) {
+ /* Avoid Sparse warning when using dscp_t in inequalities */
+ u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
+ u8 __dscp = inet_dscp_to_dsfield(dscp);
+
if (fa->fa_slen < slen)
continue;
if (fa->fa_slen != slen)
@@ -998,9 +1003,9 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
break;
if (find_first)
return fa;
- if (fa->fa_tos > tos)
+ if (__fa_dscp > __dscp)
continue;
- if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
+ if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
return fa;
}
@@ -1027,7 +1032,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
- fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
+ fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
fa->fa_type == fri->type)
return fa;
}
@@ -1037,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
+ u8 fib_notify_on_flag_change;
struct fib_alias *fa_match;
struct sk_buff *skb;
int err;
@@ -1047,21 +1053,27 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
- fa_match->offload_failed == fri->offload_failed)
+ /* These are paired with the WRITE_ONCE() happening in this function.
+ * The reason is that we are only protected by RCU at this point.
+ */
+ if (READ_ONCE(fa_match->offload) == fri->offload &&
+ READ_ONCE(fa_match->trap) == fri->trap &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload = fri->offload;
- fa_match->trap = fri->trap;
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
+
+ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
/* 2 means send notifications only if offload_failed was changed. */
- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
- fa_match->offload_failed == fri->offload_failed)
+ if (fib_notify_on_flag_change == 2 &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload_failed = fri->offload_failed;
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
+ if (!fib_notify_on_flag_change)
goto out;
skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
@@ -1210,7 +1222,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
struct fib_info *fi;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
- u8 tos = cfg->fc_tos;
+ dscp_t dscp;
u32 key;
int err;
@@ -1227,12 +1239,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto err;
}
+ dscp = cfg->fc_dscp;
l = fib_find_node(t, &tp, key);
- fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
+ fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
tb->tb_id, false) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
- * with the same keys [prefix,tos,priority], if such key already
+ * with the same keys [prefix,dscp,priority], if such key already
* exists or to the node before which we will insert new one.
*
* If fa is NULL, we will need to allocate a new one and
@@ -1240,7 +1253,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
* of the new alias.
*/
- if (fa && fa->fa_tos == tos &&
+ if (fa && fa->fa_dscp == dscp &&
fa->fa_info->fib_priority == fi->fib_priority) {
struct fib_alias *fa_first, *fa_match;
@@ -1260,7 +1273,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
hlist_for_each_entry_from(fa, fa_list) {
if ((fa->fa_slen != slen) ||
(fa->tb_id != tb->tb_id) ||
- (fa->fa_tos != tos))
+ (fa->fa_dscp != dscp))
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
@@ -1288,7 +1301,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto out;
fi_drop = fa->fa_info;
- new_fa->fa_tos = fa->fa_tos;
+ new_fa->fa_dscp = fa->fa_dscp;
new_fa->fa_info = fi;
new_fa->fa_type = cfg->fc_type;
state = fa->fa_state;
@@ -1351,7 +1364,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
goto out;
new_fa->fa_info = fi;
- new_fa->fa_tos = tos;
+ new_fa->fa_dscp = dscp;
new_fa->fa_type = cfg->fc_type;
new_fa->fa_state = 0;
new_fa->fa_slen = slen;
@@ -1419,11 +1432,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
!(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
return false;
- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
- if (flp->flowi4_oif &&
- flp->flowi4_oif != nhc->nhc_oif)
- return false;
- }
+ if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
+ return false;
return true;
}
@@ -1567,7 +1577,8 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+ if (fa->fa_dscp &&
+ inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
if (fi->fib_dead)
continue;
@@ -1703,7 +1714,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
struct key_vector *l, *tp;
u8 plen = cfg->fc_dst_len;
u8 slen = KEYLENGTH - plen;
- u8 tos = cfg->fc_tos;
+ dscp_t dscp;
u32 key;
key = ntohl(cfg->fc_dst);
@@ -1715,11 +1726,13 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!l)
return -ESRCH;
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
+ dscp = cfg->fc_dscp;
+ fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
if (!fa)
return -ESRCH;
- pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
+ pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
+ inet_dscp_to_dsfield(dscp), t);
fa_to_delete = NULL;
hlist_for_each_entry_from(fa, fa_list) {
@@ -1727,7 +1740,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if ((fa->fa_slen != slen) ||
(fa->tb_id != tb->tb_id) ||
- (fa->fa_tos != tos))
+ (fa->fa_dscp != dscp))
break;
if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -2295,11 +2308,11 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
- fri.tos = fa->fa_tos;
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -2615,7 +2628,7 @@ static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
- struct net *net = (struct net *)seq->private;
+ struct net *net = seq->private;
unsigned int h;
seq_printf(seq,
@@ -2807,8 +2820,9 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
fa->fa_info->fib_scope),
rtn_type(buf2, sizeof(buf2),
fa->fa_type));
- if (fa->fa_tos)
- seq_printf(seq, " tos=%d", fa->fa_tos);
+ if (fa->fa_dscp)
+ seq_printf(seq, " tos=%d",
+ inet_dscp_to_dsfield(fa->fa_dscp));
seq_putc(seq, '\n');
}
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 0d085cc8d96c..0c3c6d0cee29 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -16,7 +16,6 @@
#include <net/protocol.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
-#include <net/xfrm.h>
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
@@ -324,12 +323,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
off = skb_gro_offset(skb);
len = off + sizeof(*guehdr);
- guehdr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, len)) {
- guehdr = skb_gro_header_slow(skb, len, off);
- if (unlikely(!guehdr))
- goto out;
- }
+ guehdr = skb_gro_header(skb, len, off);
+ if (unlikely(!guehdr))
+ goto out;
switch (guehdr->version) {
case 0:
@@ -932,6 +928,7 @@ static struct genl_family fou_nl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = fou_nl_ops,
.n_small_ops = ARRAY_SIZE(fou_nl_ops),
+ .resv_start_op = FOU_CMD_GET + 1,
};
size_t fou_encap_hlen(struct ip_tunnel_encap *e)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 07073fa35205..2b9cb5398335 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -137,12 +137,9 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
off = skb_gro_offset(skb);
hlen = off + sizeof(*greh);
- greh = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- greh = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!greh))
- goto out;
- }
+ greh = skb_gro_header(skb, hlen, off);
+ if (unlikely(!greh))
+ goto out;
/* Only support version 0 and K (key), C (csum) flags. Note that
* although the support for the S (seq#) flag can be added easily
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b7e277d8a84d..d5d745c3e345 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -186,30 +186,20 @@ EXPORT_SYMBOL(icmp_err_convert);
*/
struct icmp_control {
- bool (*handler)(struct sk_buff *skb);
+ enum skb_drop_reason (*handler)(struct sk_buff *skb);
short error; /* This ICMP is classed as an error message */
};
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
-/*
- * The ICMP socket(s). This is the most convenient way to flow control
- * our ICMP output as well as maintain a clean interface throughout
- * all layers. All Socketless IP sends will soon be gone.
- *
- * On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmp_sk(struct net *net)
-{
- return this_cpu_read(*net->ipv4.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
/* Called with BH disabled */
static inline struct sock *icmp_xmit_lock(struct net *net)
{
struct sock *sk;
- sk = icmp_sk(net);
+ sk = this_cpu_read(ipv4_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path signals a
@@ -217,11 +207,13 @@ static inline struct sock *icmp_xmit_lock(struct net *net)
*/
return NULL;
}
+ sock_net_set(sk, net);
return sk;
}
static inline void icmp_xmit_unlock(struct sock *sk)
{
+ sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
@@ -261,11 +253,12 @@ bool icmp_global_allow(void)
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
+ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
+ credit = min_t(u32, icmp_global.credit + incr,
+ READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
@@ -289,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return true;
/* Limit if icmp type is enabled in ratemask. */
- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
+ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true;
return false;
@@ -327,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
vif = l3mdev_master_ifindex(dst->dev);
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
+ rc = inet_peer_xrlim_allow(peer,
+ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer)
inet_putpeer(peer);
out:
@@ -350,7 +344,7 @@ void icmp_out_count(struct net *net, unsigned char type)
static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
struct sk_buff *skb)
{
- struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
+ struct icmp_bxm *icmp_param = from;
__wsum csum;
csum = skb_copy_and_csum_bits(icmp_param->skb,
@@ -363,14 +357,13 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
return 0;
}
-static void icmp_push_reply(struct icmp_bxm *icmp_param,
+static void icmp_push_reply(struct sock *sk,
+ struct icmp_bxm *icmp_param,
struct flowi4 *fl4,
struct ipcm_cookie *ipc, struct rtable **rt)
{
- struct sock *sk;
struct sk_buff *skb;
- sk = icmp_sk(dev_net((*rt)->dst.dev));
if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
icmp_param->data_len+icmp_param->head_len,
icmp_param->head_len,
@@ -452,7 +445,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (IS_ERR(rt))
goto out_unlock;
if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
- icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock(sk);
@@ -701,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
rcu_read_lock();
if (rt_is_input_route(rt) &&
- net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
+ READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
if (dev)
@@ -766,7 +759,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
if (!fl4.saddr)
fl4.saddr = htonl(INADDR_DUMMY);
- icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
out_unlock:
@@ -848,8 +841,9 @@ static bool icmp_tag_validation(int proto)
* ICMP_PARAMETERPROB.
*/
-static bool icmp_unreach(struct sk_buff *skb)
+static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
@@ -869,8 +863,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmph = icmp_hdr(skb);
iph = (const struct iphdr *)skb->data;
- if (iph->ihl < 5) /* Mangled header, drop. */
+ if (iph->ihl < 5) { /* Mangled header, drop. */
+ reason = SKB_DROP_REASON_IP_INHDR;
goto out_err;
+ }
switch (icmph->type) {
case ICMP_DEST_UNREACH:
@@ -885,7 +881,7 @@ static bool icmp_unreach(struct sk_buff *skb)
* values please see
* Documentation/networking/ip-sysctl.rst
*/
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr);
@@ -938,7 +934,7 @@ static bool icmp_unreach(struct sk_buff *skb)
* get the other vendor to fix their kit.
*/
- if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
+ if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
&ip_hdr(skb)->saddr,
@@ -950,10 +946,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmp_socket_deliver(skb, info);
out:
- return true;
+ return reason;
out_err:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return false;
+ return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
}
@@ -961,20 +957,20 @@ out_err:
* Handle ICMP_REDIRECT.
*/
-static bool icmp_redirect(struct sk_buff *skb)
+static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
/* there aught to be a stat */
- return false;
+ return SKB_DROP_REASON_NOMEM;
}
icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -991,15 +987,15 @@ static bool icmp_redirect(struct sk_buff *skb)
* See also WRT handling of options once they are done and working.
*/
-static bool icmp_echo(struct sk_buff *skb)
+static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
struct net *net;
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
- if (net->ipv4.sysctl_icmp_echo_ignore_all)
- return true;
+ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
+ return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.skb = skb;
@@ -1010,10 +1006,10 @@ static bool icmp_echo(struct sk_buff *skb)
if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/* Helper for icmp_echo and icmpv6_echo_reply.
@@ -1033,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
u16 ident_len;
u8 status;
- if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
return false;
/* We currently only support probing interfaces on the proxy node
@@ -1131,7 +1127,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe);
* MUST be accurate to a few minutes.
* MUST be updated at least at 15Hz.
*/
-static bool icmp_timestamp(struct sk_buff *skb)
+static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
/*
@@ -1156,17 +1152,17 @@ static bool icmp_timestamp(struct sk_buff *skb)
icmp_param.data_len = 0;
icmp_param.head_len = sizeof(struct icmphdr) + 12;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
out_err:
__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
-static bool icmp_discard(struct sk_buff *skb)
+static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
{
/* pretend it was a success */
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -1174,18 +1170,20 @@ static bool icmp_discard(struct sk_buff *skb)
*/
int icmp_rcv(struct sk_buff *skb)
{
- struct icmphdr *icmph;
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
- bool success;
+ struct icmphdr *icmph;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
goto drop;
@@ -1193,8 +1191,11 @@ int icmp_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*icmph));
- if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
skb_set_network_header(skb, nh);
}
@@ -1216,13 +1217,13 @@ int icmp_rcv(struct sk_buff *skb)
/* We can't use icmp_pointers[].handler() because it is an array of
* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
- success = icmp_echo(skb);
- goto success_check;
+ reason = icmp_echo(skb);
+ goto reason_check;
}
if (icmph->type == ICMP_EXT_ECHOREPLY) {
- success = ping_rcv(skb);
- goto success_check;
+ reason = ping_rcv(skb);
+ goto reason_check;
}
/*
@@ -1231,8 +1232,10 @@ int icmp_rcv(struct sk_buff *skb)
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
- if (icmph->type > NR_ICMP_TYPES)
+ if (icmph->type > NR_ICMP_TYPES) {
+ reason = SKB_DROP_REASON_UNHANDLED_PROTO;
goto error;
+ }
/*
* Parse the ICMP message
@@ -1247,28 +1250,31 @@ int icmp_rcv(struct sk_buff *skb)
*/
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
- net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
}
- success = icmp_pointers[icmph->type].handler(skb);
-success_check:
- if (success) {
+ reason = icmp_pointers[icmph->type].handler(skb);
+reason_check:
+ if (!reason) {
consume_skb(skb);
return NET_RX_SUCCESS;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
@@ -1434,46 +1440,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
},
};
-static void __net_exit icmp_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
- free_percpu(net->ipv4.icmp_sk);
- net->ipv4.icmp_sk = NULL;
-}
-
static int __net_init icmp_sk_init(struct net *net)
{
- int i, err;
-
- net->ipv4.icmp_sk = alloc_percpu(struct sock *);
- if (!net->ipv4.icmp_sk)
- return -ENOMEM;
-
- for_each_possible_cpu(i) {
- struct sock *sk;
-
- err = inet_ctl_sock_create(&sk, PF_INET,
- SOCK_RAW, IPPROTO_ICMP, net);
- if (err < 0)
- goto fail;
-
- *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
-
- /* Enough space for 2 64K ICMP packets, including
- * sk_buff/skb_shared_info struct overhead.
- */
- sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
-
- /*
- * Speedup sock_wfree()
- */
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
- }
-
/* Control parameters for ECHO replies. */
net->ipv4.sysctl_icmp_echo_ignore_all = 0;
net->ipv4.sysctl_icmp_echo_enable_probe = 0;
@@ -1499,18 +1467,36 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
return 0;
-
-fail:
- icmp_sk_exit(net);
- return err;
}
static struct pernet_operations __net_initdata icmp_sk_ops = {
.init = icmp_sk_init,
- .exit = icmp_sk_exit,
};
int __init icmp_init(void)
{
+ int err, i;
+
+ for_each_possible_cpu(i) {
+ struct sock *sk;
+
+ err = inet_ctl_sock_create(&sk, PF_INET,
+ SOCK_RAW, IPPROTO_ICMP, &init_net);
+ if (err < 0)
+ return err;
+
+ per_cpu(ipv4_icmp_sk, i) = sk;
+
+ /* Enough space for 2 64K ICMP packets, including
+ * sk_buff/skb_shared_info struct overhead.
+ */
+ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
+
+ /*
+ * Speedup sock_wfree()
+ */
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
+ }
return register_pernet_subsys(&icmp_sk_ops);
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2ad3c7b42d6d..81be3e0f0e70 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -213,7 +213,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
/* It must be called with locked im->lock */
static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
- int tv = prandom_u32() % max_delay;
+ int tv = prandom_u32_max(max_delay);
im->tm_running = 1;
if (!mod_timer(&im->timer, jiffies+tv+2))
@@ -222,7 +222,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
static void igmp_gq_start_timer(struct in_device *in_dev)
{
- int tv = prandom_u32() % in_dev->mr_maxdelay;
+ int tv = prandom_u32_max(in_dev->mr_maxdelay);
unsigned long exp = jiffies + tv + 2;
if (in_dev->mr_gq_running &&
@@ -236,7 +236,7 @@ static void igmp_gq_start_timer(struct in_device *in_dev)
static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
{
- int tv = prandom_u32() % delay;
+ int tv = prandom_u32_max(delay);
if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2))
in_dev_hold(in_dev);
@@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
@@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
@@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
@@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
@@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
@@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
@@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
@@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
@@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
@@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
@@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
err = -ENOBUFS;
goto done;
}
@@ -2403,9 +2410,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
/* decrease mem now to avoid the memleak warning */
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2507,11 +2515,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
/* decrease mem now to avoid the memleak warning */
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
- } else
+ } else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
+ }
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
@@ -2519,11 +2529,10 @@ done:
err = ip_mc_leave_group(sk, &imr);
return err;
}
-
int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
- struct ip_msfilter __user *optval, int __user *optlen)
+ sockptr_t optval, sockptr_t optlen)
{
- int err, len, count, copycount;
+ int err, len, count, copycount, msf_size;
struct ip_mreqn imr;
__be32 addr = msf->imsf_multiaddr;
struct ip_mc_socklist *pmc;
@@ -2565,12 +2574,15 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
len = flex_array_size(psl, sl_addr, copycount);
msf->imsf_numsrc = count;
- if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
- copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
+ msf_size = IP_MSFILTER_SIZE(copycount);
+ if (copy_to_sockptr(optlen, &msf_size, sizeof(int)) ||
+ copy_to_sockptr(optval, msf, IP_MSFILTER_SIZE(0))) {
return -EFAULT;
}
if (len &&
- copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
+ copy_to_sockptr_offset(optval,
+ offsetof(struct ip_msfilter, imsf_slist_flex),
+ psl->sl_addr, len))
return -EFAULT;
return 0;
done:
@@ -2578,7 +2590,7 @@ done:
}
int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
- struct sockaddr_storage __user *p)
+ sockptr_t optval, size_t ss_offset)
{
int i, count, copycount;
struct sockaddr_in *psin;
@@ -2608,15 +2620,17 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
- for (i = 0; i < copycount; i++, p++) {
+ for (i = 0; i < copycount; i++) {
struct sockaddr_storage ss;
psin = (struct sockaddr_in *)&ss;
memset(&ss, 0, sizeof(ss));
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = psl->sl_addr[i];
- if (copy_to_user(p, &ss, sizeof(ss)))
+ if (copy_to_sockptr_offset(optval, ss_offset,
+ &ss, sizeof(ss)))
return -EFAULT;
+ ss_offset += sizeof(ss);
}
return 0;
}
@@ -2836,7 +2850,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n");
else {
- struct ip_mc_list *im = (struct ip_mc_list *)v;
+ struct ip_mc_list *im = v;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
char *querier;
long delta;
@@ -2980,7 +2994,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
{
- struct ip_sf_list *psf = (struct ip_sf_list *)v;
+ struct ip_sf_list *psf = v;
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
if (v == SEQ_START_TOKEN) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fc2a985f6064..4e84ed21d16f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -130,14 +130,75 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
+static bool inet_use_bhash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+
+ return addr_type != IPV6_ADDR_ANY &&
+ addr_type != IPV6_ADDR_MAPPED;
+ }
+#endif
+ return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
+static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ int bound_dev_if2;
+
+ if (sk == sk2)
+ return false;
+
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+
+ if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2) {
+ if (sk->sk_reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if (!relax || (!reuseport_ok && sk->sk_reuseport &&
+ sk2->sk_reuseport && reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(sk_uid, sock_i_uid(sk2)))))
+ return true;
+ } else if (!reuseport_ok || !sk->sk_reuseport ||
+ !sk2->sk_reuseport || !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(sk_uid, sock_i_uid(sk2)))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool inet_bhash2_conflict(const struct sock *sk,
+ const struct inet_bind2_bucket *tb2,
+ kuid_t sk_uid,
+ bool relax, bool reuseport_cb_ok,
+ bool reuseport_ok)
+{
+ struct sock *sk2;
+
+ sk_for_each_bound_bhash2(sk2, &tb2->owners) {
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ continue;
+
+ if (inet_bind_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
+ return false;
+}
+
+/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb,
+ const struct inet_bind2_bucket *tb2, /* may be null */
bool relax, bool reuseport_ok)
{
- struct sock *sk2;
bool reuseport_cb_ok;
- bool reuse = sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport;
struct sock_reuseport *reuseport_cb;
kuid_t uid = sock_i_uid((struct sock *)sk);
@@ -150,54 +211,88 @@ static int inet_csk_bind_conflict(const struct sock *sk,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners list belong to the same net - the
- * one this bucket belongs to.
+ * in tb->owners and tb2->owners list belong
+ * to the same net - the one this bucket belongs to.
*/
- sk_for_each_bound(sk2, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
- if (reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if ((!relax ||
- (!reuseport_ok &&
- reuseport && sk2->sk_reuseport &&
- reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(uid, sock_i_uid(sk2))))) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- break;
- } else if (!reuseport_ok ||
- !reuseport || !sk2->sk_reuseport ||
- !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid, sock_i_uid(sk2)))) {
- if (inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- }
+ if (!inet_use_bhash2_on_bind(sk)) {
+ struct sock *sk2;
+
+ sk_for_each_bound(sk2, &tb->owners)
+ if (inet_bind_conflict(sk, sk2, uid, relax,
+ reuseport_cb_ok, reuseport_ok) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ return true;
+
+ return false;
+ }
+
+ /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
+ * ipv4) should have been checked already. We need to do these two
+ * checks separately because their spinlocks have to be acquired/released
+ * independently of each other, to prevent possible deadlocks
+ */
+ return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+ reuseport_ok);
+}
+
+/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
+ * INADDR_ANY (if ipv4) socket.
+ *
+ * Caller must hold bhash hashbucket lock with local bh disabled, to protect
+ * against concurrent binds on the port for addr any
+ */
+static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev,
+ bool relax, bool reuseport_ok)
+{
+ kuid_t uid = sock_i_uid((struct sock *)sk);
+ const struct net *net = sock_net(sk);
+ struct sock_reuseport *reuseport_cb;
+ struct inet_bind_hashbucket *head2;
+ struct inet_bind2_bucket *tb2;
+ bool reuseport_cb_ok;
+
+ rcu_read_lock();
+ reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
+ /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
+ reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
+ rcu_read_unlock();
+
+ head2 = inet_bhash2_addr_any_hashbucket(sk, net, port);
+
+ spin_lock(&head2->lock);
+
+ inet_bind_bucket_for_each(tb2, &head2->chain)
+ if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ break;
+
+ if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+ reuseport_ok)) {
+ spin_unlock(&head2->lock);
+ return true;
}
- return sk2 != NULL;
+
+ spin_unlock(&head2->lock);
+ return false;
}
/*
* Find an open port number for the socket. Returns with the
- * inet_bind_hashbucket lock held.
+ * inet_bind_hashbucket locks held if successful.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
+inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
+ struct inet_bind2_bucket **tb2_ret,
+ struct inet_bind_hashbucket **head2_ret, int *port_ret)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int port = 0;
- struct inet_bind_hashbucket *head;
+ struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ int i, low, high, attempt_half, port, l3mdev;
+ struct inet_bind_hashbucket *head, *head2;
struct net *net = sock_net(sk);
- bool relax = false;
- int i, low, high, attempt_half;
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- int l3mdev;
+ bool relax = false;
l3mdev = inet_sk_bound_l3mdev(sk);
ports_exhausted:
@@ -219,7 +314,7 @@ other_half_scan:
if (likely(remaining > 1))
remaining &= ~1U;
- offset = prandom_u32() % remaining;
+ offset = prandom_u32_max(remaining);
/* __inet_hash_connect() favors ports having @low parity
* We do the opposite to not pollute connect() users.
*/
@@ -235,11 +330,20 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
+ if (inet_use_bhash2_on_bind(sk)) {
+ if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
+ goto next_port;
+ }
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ spin_lock(&head2->lock);
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, relax, false))
+ if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (!inet_csk_bind_conflict(sk, tb, tb2,
+ relax, false))
goto success;
+ spin_unlock(&head2->lock);
goto next_port;
}
tb = NULL;
@@ -259,7 +363,7 @@ next_port:
goto other_half_scan;
}
- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
/* We still have a chance to connect to different destinations */
relax = true;
goto ports_exhausted;
@@ -268,6 +372,8 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
+ *tb2_ret = tb2;
+ *head2_ret = head2;
return head;
}
@@ -361,56 +467,97 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
*/
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
+ struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int ret = 1, port = snum;
- struct inet_bind_hashbucket *head;
- struct net *net = sock_net(sk);
+ bool found_port = false, check_bind_conflict = true;
+ bool bhash_created = false, bhash2_created = false;
+ struct inet_bind_hashbucket *head, *head2;
+ struct inet_bind2_bucket *tb2 = NULL;
struct inet_bind_bucket *tb = NULL;
- int l3mdev;
+ bool head2_lock_acquired = false;
+ int ret = 1, port = snum, l3mdev;
+ struct net *net = sock_net(sk);
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &port);
+ head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
if (!head)
return ret;
+
+ head2_lock_acquired = true;
+
+ if (tb && tb2)
+ goto success;
+ found_port = true;
+ } else {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (inet_bind_bucket_match(tb, net, port, l3mdev))
+ break;
+ }
+
+ if (!tb) {
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
+ head, port, l3mdev);
if (!tb)
- goto tb_not_found;
- goto success;
+ goto fail_unlock;
+ bhash_created = true;
}
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port)
- goto tb_found;
-tb_not_found:
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port, l3mdev);
- if (!tb)
- goto fail_unlock;
-tb_found:
- if (!hlist_empty(&tb->owners)) {
- if (sk->sk_reuse == SK_FORCE_REUSE)
- goto success;
- if ((tb->fastreuse > 0 && reuse) ||
- sk_reuseport_match(tb, sk))
- goto success;
- if (inet_csk_bind_conflict(sk, tb, true, true))
+ if (!found_port) {
+ if (!hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse == SK_FORCE_REUSE ||
+ (tb->fastreuse > 0 && reuse) ||
+ sk_reuseport_match(tb, sk))
+ check_bind_conflict = false;
+ }
+
+ if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
+ if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
+ goto fail_unlock;
+ }
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ spin_lock(&head2->lock);
+ head2_lock_acquired = true;
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
+ }
+
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
+ net, head2, port, l3mdev, sk);
+ if (!tb2)
+ goto fail_unlock;
+ bhash2_created = true;
+ }
+
+ if (!found_port && check_bind_conflict) {
+ if (inet_csk_bind_conflict(sk, tb, tb2, true, true))
goto fail_unlock;
}
+
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, tb2, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
+ WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
+ if (ret) {
+ if (bhash_created)
+ inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
+ if (bhash2_created)
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
+ tb2);
+ }
+ if (head2_lock_acquired)
+ spin_unlock(&head2->lock);
spin_unlock_bh(&head->lock);
return ret;
}
@@ -759,14 +906,15 @@ static void reqsk_migrate_reset(struct request_sock *req)
/* return true if req was found in the ehash table */
static bool reqsk_queue_unlink(struct request_sock *req)
{
- struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
+ struct sock *sk = req_to_sk(req);
bool found = false;
- if (sk_hashed(req_to_sk(req))) {
+ if (sk_hashed(sk)) {
+ struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
spin_lock(lock);
- found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
+ found = __sk_nulls_del_node_init_rcu(sk);
spin_unlock(lock);
}
if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
@@ -829,7 +977,8 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
@@ -866,12 +1015,9 @@ static void reqsk_timer_handler(struct timer_list *t)
(!resend ||
!inet_rtx_syn_ack(sk_listener, req) ||
inet_rsk(req)->acked)) {
- unsigned long timeo;
-
if (req->num_timeout++ == 0)
atomic_dec(&queue->young);
- timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
- mod_timer(&req->rsk_timer, jiffies + timeo);
+ mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX));
if (!nreq)
return;
@@ -960,6 +1106,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
+ newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
@@ -1046,6 +1193,9 @@ int inet_csk_listen_start(struct sock *sk)
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
+ if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
+ sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
+
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 581b5b2d72a5..b812eb36f0e3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1028,12 +1028,13 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
goto skip_listen_ht;
- for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+ for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
num = 0;
- ilb = &hashinfo->listening_hash[i];
+ ilb = &hashinfo->lhash2[i];
+
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 05cd198d7a6b..c9f9ac5013a7 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
/* The RCU read lock provides a memory barrier
* guaranteeing that if fqdir->dead is false then
* the hash table destruction will not start until
- * after we unlock. Paired with inet_frags_exit_net().
+ * after we unlock. Paired with fqdir_pre_exit().
*/
- if (!fqdir->dead) {
+ if (!READ_ONCE(fqdir->dead)) {
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
fqdir->f->rhash_params);
refcount_dec(&fq->refcnt);
@@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{
+ /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
+ long high_thresh = READ_ONCE(fqdir->high_thresh);
struct inet_frag_queue *fq = NULL, *prev;
- if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
+ if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
return NULL;
rcu_read_lock();
@@ -508,7 +510,7 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
- struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+ struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
int sum_truesize;
@@ -570,6 +572,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
skb_mark_not_on_list(head);
head->prev = NULL;
head->tstamp = q->stamp;
+ head->mono_delivery_time = q->mono_delivery_time;
}
EXPORT_SYMBOL(inet_frag_reasm_finish);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 30ab717ff1b8..d3dc28156622 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -92,12 +92,79 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
+bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net,
+ unsigned short port, int l3mdev)
+{
+ return net_eq(ib_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev;
+}
+
+static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
+ struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port, int l3mdev,
+ const struct sock *sk)
+{
+ write_pnet(&tb->ib_net, net);
+ tb->l3mdev = l3mdev;
+ tb->port = port;
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->family = sk->sk_family;
+ if (sk->sk_family == AF_INET6)
+ tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ else
+#endif
+ tb->rcv_saddr = sk->sk_rcv_saddr;
+ INIT_HLIST_HEAD(&tb->owners);
+ hlist_add_head(&tb->node, &head->chain);
+}
+
+struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
+ struct net *net,
+ struct inet_bind_hashbucket *head,
+ unsigned short port,
+ int l3mdev,
+ const struct sock *sk)
+{
+ struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+
+ if (tb)
+ inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk);
+
+ return tb;
+}
+
+/* Caller must hold hashbucket lock for this tb with local BH disabled */
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
+{
+ if (hlist_empty(&tb->owners)) {
+ __hlist_del(&tb->node);
+ kmem_cache_free(cachep, tb);
+ }
+}
+
+static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
+ const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family != tb2->family)
+ return false;
+
+ if (sk->sk_family == AF_INET6)
+ return ipv6_addr_equal(&tb2->v6_rcv_saddr,
+ &sk->sk_v6_rcv_saddr);
+#endif
+ return tb2->rcv_saddr == sk->sk_rcv_saddr;
+}
+
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum)
+ struct inet_bind2_bucket *tb2, unsigned short port)
{
- inet_sk(sk)->inet_num = snum;
+ inet_sk(sk)->inet_num = port;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
+ sk_add_bind2_node(sk, &tb2->owners);
+ inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -105,11 +172,15 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
*/
static void __inet_put_port(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
- hashinfo->bhash_size);
- struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+ struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_bind_hashbucket *head, *head2;
+ struct net *net = sock_net(sk);
struct inet_bind_bucket *tb;
+ int bhash;
+
+ bhash = inet_bhashfn(net, inet_sk(sk)->inet_num, hashinfo->bhash_size);
+ head = &hashinfo->bhash[bhash];
+ head2 = inet_bhashfn_portaddr(hashinfo, sk, net, inet_sk(sk)->inet_num);
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
@@ -117,6 +188,17 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ spin_lock(&head2->lock);
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash;
+
+ __sk_del_bind2_node(sk);
+ inet_csk(sk)->icsk_bind2_hash = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+ }
+ spin_unlock(&head2->lock);
+
spin_unlock(&head->lock);
}
@@ -130,17 +212,26 @@ EXPORT_SYMBOL(inet_put_port);
int __inet_inherit_port(const struct sock *sk, struct sock *child)
{
- struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
+ struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk);
unsigned short port = inet_sk(child)->inet_num;
- const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
- struct inet_bind_hashbucket *head = &table->bhash[bhash];
+ struct inet_bind_hashbucket *head, *head2;
+ bool created_inet_bind_bucket = false;
+ struct net *net = sock_net(sk);
+ bool update_fastreuse = false;
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- int l3mdev;
+ int bhash, l3mdev;
+
+ bhash = inet_bhashfn(net, port, table->bhash_size);
+ head = &table->bhash[bhash];
+ head2 = inet_bhashfn_portaddr(table, child, net, port);
spin_lock(&head->lock);
+ spin_lock(&head2->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- if (unlikely(!tb)) {
+ tb2 = inet_csk(sk)->icsk_bind2_hash;
+ if (unlikely(!tb || !tb2)) {
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -153,25 +244,49 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), sock_net(sk)) &&
- tb->l3mdev == l3mdev && tb->port == port)
+ if (inet_bind_bucket_match(tb, net, port, l3mdev))
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- sock_net(sk), head, port,
- l3mdev);
+ net, head, port, l3mdev);
if (!tb) {
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
return -ENOMEM;
}
+ created_inet_bind_bucket = true;
+ }
+ update_fastreuse = true;
+
+ goto bhash2_find;
+ } else if (!inet_bind2_bucket_addr_match(tb2, child)) {
+ l3mdev = inet_sk_bound_l3mdev(sk);
+
+bhash2_find:
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child);
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
+ net, head2, port,
+ l3mdev, child);
+ if (!tb2)
+ goto error;
}
- inet_csk_update_fastreuse(tb, child);
}
- inet_bind_hash(child, tb, port);
+ if (update_fastreuse)
+ inet_csk_update_fastreuse(tb, child);
+ inet_bind_hash(child, tb, tb2, port);
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
return 0;
+
+error:
+ if (created_inet_bind_bucket)
+ inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
+ spin_unlock(&head2->lock);
+ spin_unlock(&head->lock);
+ return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -193,42 +308,6 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
return inet_lhash2_bucket(h, hash);
}
-static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
-{
- struct inet_listen_hashbucket *ilb2;
-
- if (!h->lhash2)
- return;
-
- ilb2 = inet_lhash2_bucket_sk(h, sk);
-
- spin_lock(&ilb2->lock);
- if (sk->sk_reuseport && sk->sk_family == AF_INET6)
- hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- else
- hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- ilb2->count++;
- spin_unlock(&ilb2->lock);
-}
-
-static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
-{
- struct inet_listen_hashbucket *ilb2;
-
- if (!h->lhash2 ||
- WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
- return;
-
- ilb2 = inet_lhash2_bucket_sk(h, sk);
-
- spin_lock(&ilb2->lock);
- hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
- ilb2->count--;
- spin_unlock(&ilb2->lock);
-}
-
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif)
@@ -282,12 +361,11 @@ static struct sock *inet_lhash2_lookup(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
int score, hiscore = 0;
- inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
@@ -312,7 +390,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net,
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != &tcp_hashinfo)
+ if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
return NULL; /* only TCP is supported */
no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
@@ -410,13 +488,11 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET_MATCH(sk, net, acookie,
- saddr, daddr, ports,
- dif, sdif))) {
+ if (unlikely(!inet_match(net, sk, acookie,
+ ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -465,8 +541,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
@@ -504,7 +579,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -532,16 +607,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
if (esk->sk_hash != sk->sk_hash)
continue;
if (sk->sk_family == AF_INET) {
- if (unlikely(INET_MATCH(esk, net, acookie,
- sk->sk_daddr,
- sk->sk_rcv_saddr,
+ if (unlikely(inet_match(net, esk, acookie,
ports, dif, sdif))) {
return true;
}
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- if (unlikely(INET6_MATCH(esk, net,
+ if (unlikely(inet6_match(net, esk,
&sk->sk_v6_daddr,
&sk->sk_v6_rcv_saddr,
ports, dif, sdif))) {
@@ -560,9 +633,9 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
*/
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct hlist_nulls_head *list;
+ struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
struct inet_ehash_bucket *head;
+ struct hlist_nulls_head *list;
spinlock_t *lock;
bool ret = true;
@@ -632,34 +705,34 @@ static int inet_reuseport_add_sock(struct sock *sk,
int __inet_hash(struct sock *sk, struct sock *osk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb;
+ struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_listen_hashbucket *ilb2;
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
+ local_bh_disable();
inet_ehash_nolisten(sk, osk, NULL);
+ local_bh_enable();
return 0;
}
WARN_ON(!sk_unhashed(sk));
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
- spin_lock(&ilb->lock);
+ spin_lock(&ilb2->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb);
+ err = inet_reuseport_add_sock(sk, ilb2);
if (err)
goto unlock;
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
- __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+ __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
else
- __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
- inet_hash2(hashinfo, sk);
- ilb->count++;
+ __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
- spin_unlock(&ilb->lock);
+ spin_unlock(&ilb2->lock);
return err;
}
@@ -669,11 +742,8 @@ int inet_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
@@ -681,58 +751,183 @@ EXPORT_SYMBOL_GPL(inet_hash);
void inet_unhash(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb = NULL;
- spinlock_t *lock;
+ struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
if (sk_unhashed(sk))
return;
if (sk->sk_state == TCP_LISTEN) {
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &ilb->lock;
+ struct inet_listen_hashbucket *ilb2;
+
+ ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
+ /* Don't disable bottom halves while acquiring the lock to
+ * avoid circular locking dependency on PREEMPT_RT.
+ */
+ spin_lock(&ilb2->lock);
+ if (sk_unhashed(sk)) {
+ spin_unlock(&ilb2->lock);
+ return;
+ }
+
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_stop_listen_sock(sk);
+
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ spin_unlock(&ilb2->lock);
} else {
- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- }
- spin_lock_bh(lock);
- if (sk_unhashed(sk))
- goto unlock;
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- if (rcu_access_pointer(sk->sk_reuseport_cb))
- reuseport_stop_listen_sock(sk);
- if (ilb) {
- inet_unhash2(hashinfo, sk);
- ilb->count--;
+ spin_lock_bh(lock);
+ if (sk_unhashed(sk)) {
+ spin_unlock_bh(lock);
+ return;
+ }
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ spin_unlock_bh(lock);
}
- __sk_nulls_del_node_init_rcu(sk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
- spin_unlock_bh(lock);
}
EXPORT_SYMBOL_GPL(inet_unhash);
+static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
+ const struct net *net, unsigned short port,
+ int l3mdev, const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family != tb->family)
+ return false;
+
+ if (sk->sk_family == AF_INET6)
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev &&
+ ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+ else
+#endif
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+}
+
+bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
+ unsigned short port, int l3mdev, const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr addr_any = {};
+
+ if (sk->sk_family != tb->family)
+ return false;
+
+ if (sk->sk_family == AF_INET6)
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev &&
+ ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any);
+ else
+#endif
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+}
+
+/* The socket's bhash2 hashbucket spinlock must be held when this is called */
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net,
+ unsigned short port, int l3mdev, const struct sock *sk)
+{
+ struct inet_bind2_bucket *bhash2 = NULL;
+
+ inet_bind_bucket_for_each(bhash2, &head->chain)
+ if (inet_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
+ break;
+
+ return bhash2;
+}
+
+struct inet_bind_hashbucket *
+inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port)
+{
+ struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ u32 hash;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr addr_any = {};
+
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(net, &addr_any, port);
+ else
+#endif
+ hash = ipv4_portaddr_hash(net, 0, port);
+
+ return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
+}
+
+int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk)
+{
+ struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_bind2_bucket *tb2, *new_tb2;
+ int l3mdev = inet_sk_bound_l3mdev(sk);
+ struct inet_bind_hashbucket *head2;
+ int port = inet_sk(sk)->inet_num;
+ struct net *net = sock_net(sk);
+
+ /* Allocate a bind2 bucket ahead of time to avoid permanently putting
+ * the bhash2 table in an inconsistent state if a new tb2 bucket
+ * allocation fails.
+ */
+ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC);
+ if (!new_tb2)
+ return -ENOMEM;
+
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+
+ if (prev_saddr) {
+ spin_lock_bh(&prev_saddr->lock);
+ __sk_del_bind2_node(sk);
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
+ inet_csk(sk)->icsk_bind2_hash);
+ spin_unlock_bh(&prev_saddr->lock);
+ }
+
+ spin_lock_bh(&head2->lock);
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
+ if (!tb2) {
+ tb2 = new_tb2;
+ inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk);
+ }
+ sk_add_bind2_node(sk, &tb2->owners);
+ inet_csk(sk)->icsk_bind2_hash = tb2;
+ spin_unlock_bh(&head2->lock);
+
+ if (tb2 != new_tb2)
+ kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
+
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
* property might be used by clever attacker.
- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
- * we use 256 instead to really give more isolation and
- * privacy, this only consumes 1 KB of kernel memory.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 instead to really
+ * give more isolation and privacy, at the expense of 256kB of kernel
+ * memory.
*/
-#define INET_TABLE_PERTURB_SHIFT 8
-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+#define INET_TABLE_PERTURB_SHIFT 16
+#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT)
+static u32 *table_perturb;
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
+ struct inet_bind_hashbucket *head, *head2;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
+ bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -763,10 +958,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- net_get_random_once(table_perturb, sizeof(table_perturb));
- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+ get_random_sleepable_once(table_perturb,
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+ offset %= remaining;
- offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -786,8 +984,7 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port) {
+ if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -805,6 +1002,7 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
+ tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -820,15 +1018,33 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* If our first attempt found a candidate, skip next candidate
- * in 1/16 of cases to add some noise.
+ /* Find the corresponding tb2 bucket since we need to
+ * add the socket to the bhash2 table as well
+ */
+ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ spin_lock(&head2->lock);
+
+ tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
+ head2, port, l3mdev, sk);
+ if (!tb2)
+ goto error;
+ }
+
+ /* Here we want to add a little bit of randomness to the next source
+ * port that will be chosen. We use a max() with a random here so that
+ * on low contention the randomness is maximal and on high contention
+ * it may be inexistent.
*/
- if (!i && !(prandom_u32() % 16))
- i = 2;
+ i = max_t(int, i, prandom_u32_max(8) * 2);
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, tb2, port);
+
+ spin_unlock(&head2->lock);
+
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -840,6 +1056,13 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
+
+error:
+ spin_unlock(&head2->lock);
+ if (tb_created)
+ inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
+ spin_unlock_bh(&head->lock);
+ return -ENOMEM;
}
/*
@@ -848,7 +1071,7 @@ ok:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk);
@@ -857,29 +1080,14 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
-void inet_hashinfo_init(struct inet_hashinfo *h)
-{
- int i;
-
- for (i = 0; i < INET_LHTABLE_SIZE; i++) {
- spin_lock_init(&h->listening_hash[i].lock);
- INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
- i + LISTENING_NULLS_BASE);
- h->listening_hash[i].count = 0;
- }
-
- h->lhash2 = NULL;
-}
-EXPORT_SYMBOL_GPL(inet_hashinfo_init);
-
static void init_hashinfo_lhash2(struct inet_hashinfo *h)
{
int i;
for (i = 0; i <= h->lhash2_mask; i++) {
spin_lock_init(&h->lhash2[i].lock);
- INIT_HLIST_HEAD(&h->lhash2[i].head);
- h->lhash2[i].count = 0;
+ INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head,
+ i + LISTENING_NULLS_BASE);
}
}
@@ -898,6 +1106,14 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
low_limit,
high_limit);
init_hashinfo_lhash2(h);
+
+ /* this one is used for source ports of outgoing connections */
+ table_perturb = alloc_large_system_hash("Table-perturb",
+ sizeof(*table_perturb),
+ INET_TABLE_PERTURB_SIZE,
+ 0, 0, NULL, NULL,
+ INET_TABLE_PERTURB_SIZE,
+ INET_TABLE_PERTURB_SIZE);
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
@@ -939,3 +1155,50 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
return 0;
}
EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
+
+struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
+ unsigned int ehash_entries)
+{
+ struct inet_hashinfo *new_hashinfo;
+ int i;
+
+ new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL);
+ if (!new_hashinfo)
+ goto err;
+
+ new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket),
+ GFP_KERNEL_ACCOUNT);
+ if (!new_hashinfo->ehash)
+ goto free_hashinfo;
+
+ new_hashinfo->ehash_mask = ehash_entries - 1;
+
+ if (inet_ehash_locks_alloc(new_hashinfo))
+ goto free_ehash;
+
+ for (i = 0; i < ehash_entries; i++)
+ INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i);
+
+ new_hashinfo->pernet = true;
+
+ return new_hashinfo;
+
+free_ehash:
+ vfree(new_hashinfo->ehash);
+free_hashinfo:
+ kfree(new_hashinfo);
+err:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc);
+
+void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo)
+{
+ if (!hashinfo->pernet)
+ return;
+
+ inet_ehash_locks_free(hashinfo);
+ vfree(hashinfo->ehash);
+ kfree(hashinfo);
+}
+EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 437afe392e66..66fc940f9521 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -59,7 +59,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock);
- atomic_dec(&tw->tw_dr->tw_count);
+ refcount_dec(&tw->tw_dr->tw_refcount);
inet_twsk_put(tw);
}
@@ -145,10 +145,6 @@ static void tw_timer_handler(struct timer_list *t)
{
struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
- if (tw->tw_kill)
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
- else
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITED);
inet_twsk_kill(tw);
}
@@ -158,7 +154,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
{
struct inet_timewait_sock *tw;
- if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+ if (refcount_read(&dr->tw_refcount) - 1 >=
+ READ_ONCE(dr->sysctl_max_tw_buckets))
return NULL;
tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
@@ -244,10 +241,13 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
* of PAWS.
*/
- tw->tw_kill = timeo <= 4*HZ;
if (!rearm) {
+ bool kill = timeo <= 4*HZ;
+
+ __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
+ LINUX_MIB_TIMEWAITED);
BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
- atomic_inc(&tw->tw_dr->tw_count);
+ refcount_inc(&tw->tw_dr->tw_refcount);
} else {
mod_timer_pending(&tw->tw_timer, jiffies + timeo);
}
@@ -268,8 +268,21 @@ restart_rcu:
rcu_read_lock();
restart:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
- if (sk->sk_state != TCP_TIME_WAIT)
+ if (sk->sk_state != TCP_TIME_WAIT) {
+ /* A kernel listener socket might not hold refcnt for net,
+ * so reqsk_timer_handler() could be fired after net is
+ * freed. Userspace listener and reqsk never exist here.
+ */
+ if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV &&
+ hashinfo->pernet)) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
+ }
+
continue;
+ }
+
tw = inet_twsk(sk);
if ((tw->tw_family != family) ||
refcount_read(&twsk_net(tw)->ns.count))
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index da21dfce24d7..e9fed83e9b3c 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[],
unsigned int gc_cnt)
{
+ int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p;
__u32 delta, ttl;
int i;
- if (base->total >= inet_peer_threshold)
+ peer_threshold = READ_ONCE(inet_peer_threshold);
+ peer_maxttl = READ_ONCE(inet_peer_maxttl);
+ peer_minttl = READ_ONCE(inet_peer_minttl);
+
+ if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */
else
- ttl = inet_peer_maxttl
- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
- base->total / inet_peer_threshold * HZ;
+ ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
+ base->total / peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 00ec819f949b..e18931a6d153 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -79,7 +79,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
if (unlikely(opt->optlen))
ip_forward_options(skb);
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return dst_output(net, sk, skb);
}
@@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
struct net *net;
+ SKB_DR(reason);
/* that should never happen */
if (skb->pkt_type != PACKET_HOST)
@@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
@@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb)
if (ip_hdr(skb)->ttl <= 1)
goto too_many_hops;
- if (!xfrm4_route_forward(skb))
+ if (!xfrm4_route_forward(skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
rt = skb_rtable(skb);
@@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb)
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ SKB_DR_SET(reason, PKT_TOO_BIG);
goto drop;
}
@@ -151,7 +157,7 @@ int ip_forward(struct sk_buff *skb)
!skb_sec_path(skb))
ip_rt_send_redirect(skb);
- if (net->ipv4.sysctl_ip_fwd_update_priority)
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
skb->priority = rt_tos2priority(iph->tos);
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
@@ -169,7 +175,8 @@ too_many_hops:
/* Tell the sender its packet died... */
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+ SKB_DR_SET(reason, IP_INHDR);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb8890f94e..fb153569889e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
rcu_read_lock();
- if (qp->q.fqdir->dead)
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(qp->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&qp->q.lock);
@@ -348,6 +349,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->iif = dev->ifindex;
qp->q.stamp = skb->tstamp;
+ qp->q.mono_delivery_time = skb->mono_delivery_time;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ac2b95c5694..f866d6282b2b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -526,7 +524,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
int tunnel_hlen;
int version;
int nhoff;
- int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -560,10 +557,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
@@ -581,7 +584,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -604,8 +607,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), key->tos, 0,
- skb->mark, skb_get_hash(skb));
+ tunnel_id_to_key32(key->tun_id),
+ key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
+ skb->mark, skb_get_hash(skb), key->flow_flags);
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -630,21 +634,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
- const int pull_len = tunnel->hlen + sizeof(struct iphdr);
-
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- if (pull_len > skb_transport_offset(skb))
- goto free_skb;
-
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, pull_len);
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
@@ -749,6 +752,7 @@ free_skb:
static void ipgre_link_update(struct net_device *dev, bool set_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ __be16 flags;
int len;
len = tunnel->tun_hlen;
@@ -764,19 +768,15 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- tunnel->encap.type == TUNNEL_ENCAP_NONE) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- } else {
- dev->features &= ~NETIF_F_GSO_SOFTWARE;
- dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- }
- dev->features |= NETIF_F_LLTX;
- } else {
+ flags = tunnel->parms.o_flags;
+
+ if (flags & TUNNEL_SEQ ||
+ (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ dev->features &= ~NETIF_F_GSO_SOFTWARE;
dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
+ } else {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
}
@@ -950,6 +950,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
+ __be16 flags;
tunnel = netdev_priv(dev);
tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -958,25 +959,21 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= GRE_FEATURES;
+ dev->features |= GRE_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE_FEATURES;
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
+ flags = tunnel->parms.o_flags;
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
+ /* TCP offload with GRE SEQ is not supported, nor can we support 2
+ * levels of outer headers requiring an update.
+ */
+ if (flags & TUNNEL_SEQ)
+ return;
+ if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ return;
+
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3a025c011971..1b512390b3cf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -196,7 +196,8 @@ resubmit:
if (ipprot) {
if (!ipprot->no_policy) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
+ kfree_skb_reason(skb,
+ SKB_DROP_REASON_XFRM_POLICY);
return;
}
nf_reset_ct(skb);
@@ -215,7 +216,7 @@ resubmit:
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_PROT_UNREACH, 0);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
} else {
__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
@@ -225,6 +226,7 @@ resubmit:
static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ skb_clear_delivery_time(skb);
__skb_pull(skb, skb_network_header_len(skb));
rcu_read_lock();
@@ -310,16 +312,17 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
ip_hdr(hint)->tos == iph->tos;
}
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
+ int err, drop_reason;
struct rtable *rt;
- int err;
+
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (ip_can_use_hint(skb, iph, hint)) {
err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
@@ -328,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
goto drop_error;
}
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
- udp_v4_early_demux, skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
@@ -396,19 +407,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* so-called "hole-196" attack) so do it for both.
*/
if (in_dev &&
- IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+ IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
+ drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
goto drop;
+ }
}
return NET_RX_SUCCESS;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV)
+ if (err == -EXDEV) {
+ drop_reason = SKB_DROP_REASON_IP_RPFILTER;
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+ }
goto drop;
}
@@ -436,13 +451,17 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
{
const struct iphdr *iph;
+ int drop_reason;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
* that it receives, do not try to analyse it.
*/
- if (skb->pkt_type == PACKET_OTHERHOST)
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
+ drop_reason = SKB_DROP_REASON_OTHERHOST;
goto drop;
+ }
__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
@@ -452,6 +471,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
goto out;
}
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
@@ -488,6 +508,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
len = ntohs(iph->tot_len);
if (skb->len < len) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
@@ -516,11 +537,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
return skb;
csum_error:
+ drop_reason = SKB_DROP_REASON_IP_CSUM;
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
+ if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ drop_reason = SKB_DROP_REASON_IP_INHDR;
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
out:
return NULL;
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index da1b5038bdfd..a9e22a098872 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -42,7 +42,7 @@
*/
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
- __be32 daddr, struct rtable *rt, int is_frag)
+ __be32 daddr, struct rtable *rt)
{
unsigned char *iph = skb_network_header(skb);
@@ -53,28 +53,15 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
if (opt->srr)
memcpy(iph + opt->srr + iph[opt->srr + 1] - 4, &daddr, 4);
- if (!is_frag) {
- if (opt->rr_needaddr)
- ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
- if (opt->ts_needaddr)
- ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
- if (opt->ts_needtime) {
- __be32 midtime;
+ if (opt->rr_needaddr)
+ ip_rt_get_source(iph + opt->rr + iph[opt->rr + 2] - 5, skb, rt);
+ if (opt->ts_needaddr)
+ ip_rt_get_source(iph + opt->ts + iph[opt->ts + 2] - 9, skb, rt);
+ if (opt->ts_needtime) {
+ __be32 midtime;
- midtime = inet_current_timestamp();
- memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
- }
- return;
- }
- if (opt->rr) {
- memset(iph + opt->rr, IPOPT_NOP, iph[opt->rr + 1]);
- opt->rr = 0;
- opt->rr_needaddr = 0;
- }
- if (opt->ts) {
- memset(iph + opt->ts, IPOPT_NOP, iph[opt->ts + 1]);
- opt->ts = 0;
- opt->ts_needaddr = opt->ts_needtime = 0;
+ midtime = inet_current_timestamp();
+ memcpy(iph + opt->ts + iph[opt->ts + 2] - 5, &midtime, 4);
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 57c1d8431386..922c87ef1ab5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -162,17 +162,24 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)get_random_u16();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
- ip_options_build(skb, &opt->opt, daddr, rt, 0);
+ ip_options_build(skb, &opt->opt, daddr, rt);
}
skb->priority = sk->sk_priority;
@@ -226,7 +233,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
}
@@ -310,7 +317,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
case NET_XMIT_CN:
return __ip_finish_output(net, sk, skb) ? : ret;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
@@ -330,7 +337,7 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
case NET_XMIT_SUCCESS:
break;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
@@ -512,7 +519,7 @@ packet_routed:
if (inet_opt && inet_opt->opt.optlen) {
iph->ihl += inet_opt->opt.optlen >> 2;
- ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
+ ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt);
}
ip_select_ident_segs(net, skb, sk,
@@ -529,7 +536,7 @@ packet_routed:
no_route:
rcu_read_unlock();
IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES);
return -EHOSTUNREACH;
}
EXPORT_SYMBOL(__ip_queue_xmit);
@@ -754,6 +761,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
struct sk_buff *skb2;
+ bool mono_delivery_time = skb->mono_delivery_time;
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
@@ -825,18 +833,27 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
- if (iter.frag)
- ip_options_fragment(iter.frag);
-
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
+ bool first_frag = (iter.offset == 0);
+
IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
+ if (first_frag && IPCB(skb)->opt.optlen) {
+ /* ipcb->opt is not populated for frags
+ * coming from __ip_make_skb(),
+ * ip_options_fragment() needs optlen
+ */
+ IPCB(iter.frag)->opt.optlen =
+ IPCB(skb)->opt.optlen;
+ ip_options_fragment(iter.frag);
+ ip_send_check(iter.iph);
+ }
}
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, skb);
if (!err)
@@ -892,7 +909,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, skb2);
if (err)
goto fail;
@@ -952,7 +969,6 @@ static int __ip_append_data(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct ubuf_info *uarg = NULL;
struct sk_buff *skb;
-
struct ip_options *opt = cork->opt;
int hh_len;
int exthdrlen;
@@ -960,6 +976,7 @@ static int __ip_append_data(struct sock *sk,
int copy;
int err;
int offset = 0;
+ bool zc = false;
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
@@ -975,7 +992,7 @@ static int __ip_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1000,17 +1017,35 @@ static int __ip_append_data(struct sock *sk,
(!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
csummode = CHECKSUM_PARTIAL;
- if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
- uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
- if (!uarg)
- return -ENOBUFS;
- extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
- if (rt->dst.dev->features & NETIF_F_SG &&
- csummode == CHECKSUM_PARTIAL) {
- paged = true;
- } else {
- uarg->zerocopy = 0;
- skb_zcopy_set(skb, uarg, &extra_uref);
+ if ((flags & MSG_ZEROCOPY) && length) {
+ struct msghdr *msg = from;
+
+ if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
+ if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
+ return -EINVAL;
+
+ /* Leave uarg NULL if can't zerocopy, callers should
+ * be able to handle it.
+ */
+ if ((rt->dst.dev->features & NETIF_F_SG) &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ zc = true;
+ uarg = msg->msg_ubuf;
+ }
+ } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ zc = true;
+ } else {
+ uarg_to_msgzc(uarg)->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
}
}
@@ -1075,8 +1110,8 @@ alloc_new_skb:
!(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
- alloclen = min_t(int, fraglen, MAX_HEADER);
- pagedlen = fraglen - alloclen;
+ alloclen = fragheaderlen + transhdrlen;
+ pagedlen = datalen - transhdrlen;
}
alloclen += alloc_extra;
@@ -1171,13 +1206,14 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
- } else if (!uarg || !uarg->zerocopy) {
+ } else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
if (!sk_page_frag_refill(sk, pfrag))
goto error;
+ skb_zcopy_downgrade_managed(skb);
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
err = -EMSGSIZE;
@@ -1197,9 +1233,7 @@ alloc_new_skb:
pfrag->offset += copy;
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
+ skb_len_add(skb, copy);
wmem_alloc_delta += copy;
} else {
err = skb_zerocopy_iter_dgram(skb, from, copy);
@@ -1426,9 +1460,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
skb->csum = csum_block_add(skb->csum, csum, skb->len);
}
- skb->len += len;
- skb->data_len += len;
- skb->truesize += len;
+ skb_len_add(skb, len);
refcount_add(len, &sk->sk_wmem_alloc);
offset += len;
size -= len;
@@ -1525,7 +1557,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
if (opt) {
iph->ihl += opt->optlen >> 2;
- ip_options_build(skb, opt, cork->addr, rt, 0);
+ ip_options_build(skb, opt, cork->addr, rt);
}
skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
@@ -1687,7 +1719,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
arg->uid);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
- rt = ip_route_output_key(net, &fl4);
+ rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt))
return;
@@ -1695,7 +1727,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
ipc.sockc.mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
@@ -1711,6 +1743,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ nskb->mono_delivery_time = !!transmit_time;
ip_push_pending_frames(sk, &fl4);
}
out:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 445a9ecaefa1..6e19cad154f5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = -EINVAL;
@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
&gf32->gf_group, gf32->gf_slist_flex);
@@ -888,8 +888,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);
-static int do_ip_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen)
+int do_ip_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
@@ -944,7 +944,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = 0;
if (needs_rtnl)
rtnl_lock();
- lock_sock(sk);
+ sockopt_lock_sock(sk);
switch (optname) {
case IP_OPTIONS:
@@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > sysctl_optmem_max) {
+ if (optlen > READ_ONCE(sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
@@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
kfree(msf);
err = -ENOBUFS;
break;
@@ -1333,14 +1333,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
break;
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
case IP_TRANSPARENT:
- if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
err = -EPERM;
break;
}
@@ -1368,13 +1368,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = -ENOPROTOOPT;
break;
}
- release_sock(sk);
+ sockopt_release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
return err;
e_inval:
- release_sock(sk);
+ sockopt_release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
return -EINVAL;
@@ -1462,37 +1462,37 @@ static bool getsockopt_needs_rtnl(int optname)
return false;
}
-static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
const int size0 = offsetof(struct group_filter, gf_slist_flex);
- struct group_filter __user *p = optval;
struct group_filter gsf;
- int num;
+ int num, gsf_size;
int err;
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gsf, p, size0))
+ if (copy_from_sockptr(&gsf, optval, size0))
return -EFAULT;
num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
+ err = ip_mc_gsfget(sk, &gsf, optval,
+ offsetof(struct group_filter, gf_slist_flex));
if (err)
return err;
if (gsf.gf_numsrc < num)
num = gsf.gf_numsrc;
- if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
- copy_to_user(p, &gsf, size0))
+ gsf_size = GROUP_FILTER_SIZE(num);
+ if (copy_to_sockptr(optlen, &gsf_size, sizeof(int)) ||
+ copy_to_sockptr(optval, &gsf, size0))
return -EFAULT;
return 0;
}
-static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int compat_ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
- struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
int num;
@@ -1500,7 +1500,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gf32, p, size0))
+ if (copy_from_sockptr(&gf32, optval, size0))
return -EFAULT;
gf.gf_interface = gf32.gf_interface;
@@ -1508,21 +1508,24 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
num = gf.gf_numsrc = gf32.gf_numsrc;
gf.gf_group = gf32.gf_group;
- err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
+ err = ip_mc_gsfget(sk, &gf, optval,
+ offsetof(struct compat_group_filter, gf_slist_flex));
if (err)
return err;
if (gf.gf_numsrc < num)
num = gf.gf_numsrc;
len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
- if (put_user(len, optlen) ||
- put_user(gf.gf_fmode, &p->gf_fmode) ||
- put_user(gf.gf_numsrc, &p->gf_numsrc))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode),
+ &gf.gf_fmode, sizeof(gf.gf_fmode)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc),
+ &gf.gf_numsrc, sizeof(gf.gf_numsrc)))
return -EFAULT;
return 0;
}
-static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+int do_ip_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen)
{
struct inet_sock *inet = inet_sk(sk);
bool needs_rtnl = getsockopt_needs_rtnl(optname);
@@ -1535,14 +1538,14 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_getsockopt(sk, optname, optval, optlen);
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len < 0)
return -EINVAL;
if (needs_rtnl)
rtnl_lock();
- lock_sock(sk);
+ sockopt_lock_sock(sk);
switch (optname) {
case IP_OPTIONS:
@@ -1558,17 +1561,19 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
memcpy(optbuf, &inet_opt->opt,
sizeof(struct ip_options) +
inet_opt->opt.optlen);
- release_sock(sk);
+ sockopt_release_sock(sk);
- if (opt->optlen == 0)
- return put_user(0, optlen);
+ if (opt->optlen == 0) {
+ len = 0;
+ return copy_to_sockptr(optlen, &len, sizeof(int));
+ }
ip_options_undo(opt);
len = min_t(unsigned int, len, opt->optlen);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, opt->__data, len))
+ if (copy_to_sockptr(optval, opt->__data, len))
return -EFAULT;
return 0;
}
@@ -1606,7 +1611,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- net->ipv4.sysctl_ip_default_ttl :
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
inet->uc_ttl);
break;
}
@@ -1632,7 +1637,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
dst_release(dst);
}
if (!val) {
- release_sock(sk);
+ sockopt_release_sock(sk);
return -ENOTCONN;
}
break;
@@ -1657,11 +1662,11 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
struct in_addr addr;
len = min_t(unsigned int, len, sizeof(struct in_addr));
addr.s_addr = inet->mc_addr;
- release_sock(sk);
+ sockopt_release_sock(sk);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &addr, len))
+ if (copy_to_sockptr(optval, &addr, len))
return -EFAULT;
return 0;
}
@@ -1673,12 +1678,11 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
err = -EINVAL;
goto out;
}
- if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
+ if (copy_from_sockptr(&msf, optval, IP_MSFILTER_SIZE(0))) {
err = -EFAULT;
goto out;
}
- err = ip_mc_msfget(sk, &msf,
- (struct ip_msfilter __user *)optval, optlen);
+ err = ip_mc_msfget(sk, &msf, optval, optlen);
goto out;
}
case MCAST_MSFILTER:
@@ -1695,13 +1699,18 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
{
struct msghdr msg;
- release_sock(sk);
+ sockopt_release_sock(sk);
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control_is_user = true;
- msg.msg_control_user = optval;
+ if (optval.is_kernel) {
+ msg.msg_control_is_user = false;
+ msg.msg_control = optval.kernel;
+ } else {
+ msg.msg_control_is_user = true;
+ msg.msg_control_user = optval.user;
+ }
msg.msg_controllen = len;
msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
@@ -1722,7 +1731,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
}
len -= msg.msg_controllen;
- return put_user(len, optlen);
+ return copy_to_sockptr(optlen, &len, sizeof(int));
}
case IP_FREEBIND:
val = inet->freebind;
@@ -1734,29 +1743,29 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
val = inet->min_ttl;
break;
default:
- release_sock(sk);
+ sockopt_release_sock(sk);
return -ENOPROTOOPT;
}
- release_sock(sk);
+ sockopt_release_sock(sk);
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &ucval, 1))
+ if (copy_to_sockptr(optval, &ucval, 1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, len))
+ if (copy_to_sockptr(optval, &val, len))
return -EFAULT;
}
return 0;
out:
- release_sock(sk);
+ sockopt_release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
return err;
@@ -1767,7 +1776,8 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname,
+ USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5a473319d3a5..019f3b0839c5 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -242,7 +242,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
goto failed;
- strlcpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name, IFNAMSIZ);
} else {
if (strlen(ops->kind) > (IFNAMSIZ - 3))
goto failed;
@@ -294,8 +294,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark, 0);
+ RT_TOS(iph->tos), dev_net(dev),
+ tunnel->parms.link, tunnel->fwmark, 0, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -570,7 +570,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
- 0, skb->mark, skb_get_hash(skb));
+ dev_net(dev), 0, skb->mark, skb_get_hash(skb),
+ key->flow_flags);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
@@ -641,6 +642,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *inner_iph;
unsigned int max_headroom; /* The extra header space needed */
struct rtable *rt = NULL; /* Route to the other host */
+ __be16 payload_protocol;
bool use_cache = false;
struct flowi4 fl4;
bool md = false;
@@ -651,6 +653,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ payload_protocol = skb_protocol(skb, true);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -670,13 +673,12 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
dst = tun_info->key.u.ipv4.dst;
md = true;
connected = true;
- }
- else if (skb->protocol == htons(ETH_P_IP)) {
+ } else if (payload_protocol == htons(ETH_P_IP)) {
rt = skb_rtable(skb);
dst = rt_nexthop(rt, inner_iph->daddr);
}
#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
+ else if (payload_protocol == htons(ETH_P_IPV6)) {
const struct in6_addr *addr6;
struct neighbour *neigh;
bool do_tx_error_icmp;
@@ -716,18 +718,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = tnl_params->tos;
if (tos & 0x1) {
tos &= ~0x1;
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (payload_protocol == htons(ETH_P_IP)) {
tos = inner_iph->tos;
connected = false;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (payload_protocol == htons(ETH_P_IPV6)) {
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
connected = false;
}
}
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark, skb_get_hash(skb));
+ tunnel->parms.o_key, RT_TOS(tos),
+ dev_net(dev), tunnel->parms.link,
+ tunnel->fwmark, skb_get_hash(skb), 0);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
@@ -764,7 +767,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
+ if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
df |= (inner_iph->frag_off & htons(IP_DF));
if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
@@ -785,10 +788,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = tnl_params->ttl;
if (ttl == 0) {
- if (skb->protocol == htons(ETH_P_IP))
+ if (payload_protocol == htons(ETH_P_IP))
ttl = inner_iph->ttl;
#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (payload_protocol == htons(ETH_P_IPV6))
ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
#endif
else
@@ -1064,7 +1067,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
memset(&parms, 0, sizeof(parms));
if (devname)
- strlcpy(parms.name, devname, IFNAMSIZ);
+ strscpy(parms.name, devname, IFNAMSIZ);
rtnl_lock();
itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6b2dc7b2b612..92c02c886fe7 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
u32 mtu = dst_mtu(encap_dst) - headroom;
if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
- (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
+ (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu))
return 0;
skb_dst_update_pmtu_no_confirm(skb, mtu);
@@ -1079,3 +1079,70 @@ EXPORT_SYMBOL(ip_tunnel_parse_protocol);
const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol };
EXPORT_SYMBOL(ip_tunnel_header_ops);
+
+/* This function returns true when ENCAP attributes are present in the nl msg */
+bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *encap)
+{
+ bool ret = false;
+
+ memset(encap, 0, sizeof(*encap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ encap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ encap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ encap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ encap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms);
+
+void ip_tunnel_netlink_parms(struct nlattr *data[],
+ struct ip_tunnel_parm *parms)
+{
+ if (data[IFLA_IPTUN_LINK])
+ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
+
+ if (data[IFLA_IPTUN_LOCAL])
+ parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+
+ if (data[IFLA_IPTUN_REMOTE])
+ parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+
+ if (data[IFLA_IPTUN_TTL]) {
+ parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
+ if (parms->iph.ttl)
+ parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_IPTUN_TOS])
+ parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
+
+ if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
+ parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_IPTUN_FLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_netlink_parms);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 366094c1ce6c..5a4fb2539b08 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -117,7 +117,8 @@ out:
return err;
}
-static int ipcomp4_init_state(struct xfrm_state *x)
+static int ipcomp4_init_state(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
{
int err = -EINVAL;
@@ -129,17 +130,20 @@ static int ipcomp4_init_state(struct xfrm_state *x)
x->props.header_len += sizeof(struct iphdr);
break;
default:
+ NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp");
goto out;
}
- err = ipcomp_init_state(x);
+ err = ipcomp_init_state(x, extack);
if (err)
goto out;
if (x->props.mode == XFRM_MODE_TUNNEL) {
err = ipcomp_tunnel_attach(x);
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state");
goto out;
+ }
}
err = 0;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 9d41d5d5cd1e..e90bc0aa85c7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1434,6 +1434,7 @@ __be32 __init root_nfs_parse_addr(char *name)
static int __init wait_for_devices(void)
{
int i;
+ bool try_init_devs = true;
for (i = 0; i < DEVICE_WAIT_MAX; i++) {
struct net_device *dev;
@@ -1452,6 +1453,11 @@ static int __init wait_for_devices(void)
rtnl_unlock();
if (found)
return 0;
+ if (try_init_devs &&
+ (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) {
+ try_init_devs = false;
+ wait_for_init_devices_probe();
+ }
ssleep(1);
}
return -ENODEV;
@@ -1759,15 +1765,15 @@ static int __init ip_auto_config_setup(char *addrs)
case 4:
if ((dp = strchr(ip, '.'))) {
*dp++ = '\0';
- strlcpy(utsname()->domainname, dp,
+ strscpy(utsname()->domainname, dp,
sizeof(utsname()->domainname));
}
- strlcpy(utsname()->nodename, ip,
+ strscpy(utsname()->nodename, ip,
sizeof(utsname()->nodename));
ic_host_name_set = 1;
break;
case 5:
- strlcpy(user_dev_name, ip, sizeof(user_dev_name));
+ strscpy(user_dev_name, ip, sizeof(user_dev_name));
break;
case 6:
if (ic_proto_name(ip) == 0 &&
@@ -1814,7 +1820,7 @@ __setup("nfsaddrs=", nfsaddrs_config_setup);
static int __init vendor_class_identifier_setup(char *addrs)
{
- if (strlcpy(vendor_class_identifier, addrs,
+ if (strscpy(vendor_class_identifier, addrs,
sizeof(vendor_class_identifier))
>= sizeof(vendor_class_identifier))
pr_warn("DHCP: vendorclass too long, truncated to \"%s\"\n",
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 123ea63a04cb..180f9daf5bec 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -417,29 +417,7 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (!data)
return;
- if (data[IFLA_IPTUN_LINK])
- parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
-
- if (data[IFLA_IPTUN_LOCAL])
- parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
-
- if (data[IFLA_IPTUN_REMOTE])
- parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
-
- if (data[IFLA_IPTUN_TTL]) {
- parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
- if (parms->iph.ttl)
- parms->iph.frag_off = htons(IP_DF);
- }
-
- if (data[IFLA_IPTUN_TOS])
- parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
-
- if (data[IFLA_IPTUN_PROTO])
- parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
-
- if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
- parms->iph.frag_off = htons(IP_DF);
+ ip_tunnel_netlink_parms(data, parms);
if (data[IFLA_IPTUN_COLLECT_METADATA])
*collect_md = true;
@@ -448,40 +426,6 @@ static void ipip_netlink_parms(struct nlattr *data[],
*fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
-/* This function returns true when ENCAP attributes are present in the nl msg */
-static bool ipip_netlink_encap_parms(struct nlattr *data[],
- struct ip_tunnel_encap *ipencap)
-{
- bool ret = false;
-
- memset(ipencap, 0, sizeof(*ipencap));
-
- if (!data)
- return ret;
-
- if (data[IFLA_IPTUN_ENCAP_TYPE]) {
- ret = true;
- ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
- ret = true;
- ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_SPORT]) {
- ret = true;
- ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_DPORT]) {
- ret = true;
- ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
- }
-
- return ret;
-}
-
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -491,7 +435,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct ip_tunnel_encap ipencap;
__u32 fwmark = 0;
- if (ipip_netlink_encap_parms(data, &ipencap)) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
@@ -512,7 +456,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
bool collect_md;
__u32 fwmark = t->fwmark;
- if (ipip_netlink_encap_parms(data, &ipencap)) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07274619b9ea..e04544ac4b45 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -77,7 +77,12 @@ struct ipmr_result {
* Note that the changes are semaphored via rtnl_lock.
*/
-static DEFINE_RWLOCK(mrt_lock);
+static DEFINE_SPINLOCK(mrt_lock);
+
+static struct net_device *vif_dev_read(const struct vif_device *vif)
+{
+ return rcu_dereference(vif->dev);
+}
/* Multicast router control variables */
@@ -100,11 +105,11 @@ static void ipmr_free_table(struct mr_table *mrt);
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc_cache *cache, int local);
-static int ipmr_cache_report(struct mr_table *mrt,
+static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
-static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
+static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
@@ -256,7 +261,9 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ipmr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
@@ -266,13 +273,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
- rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -328,10 +334,9 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
- rtnl_lock();
+ ASSERT_RTNL();
ipmr_free_table(net->ipv4.mrt);
net->ipv4.mrt = NULL;
- rtnl_unlock();
}
static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -356,7 +361,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct mfc_cache_cmp_arg *cmparg = arg->key;
- struct mfc_cache *c = (struct mfc_cache *)ptr;
+ const struct mfc_cache *c = ptr;
return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
cmparg->mfc_origin != c->mfc_origin;
@@ -501,11 +506,15 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
return err;
}
- read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
dev->stats.tx_packets++;
- ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
- read_unlock(&mrt_lock);
+ rcu_read_lock();
+
+ /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
+ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
+ IGMPMSG_WHOLEPKT);
+
+ rcu_read_unlock();
kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -572,6 +581,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
{
struct net_device *reg_dev = NULL;
struct iphdr *encap;
+ int vif_num;
encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
/* Check that:
@@ -584,11 +594,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
ntohs(encap->tot_len) + pimlen > skb->len)
return 1;
- read_lock(&mrt_lock);
- if (mrt->mroute_reg_vif_num >= 0)
- reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
- read_unlock(&mrt_lock);
-
+ /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */
+ vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
+ if (vif_num >= 0)
+ reg_dev = vif_dev_read(&mrt->vif_table[vif_num]);
if (!reg_dev)
return 1;
@@ -614,10 +623,11 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
static int call_ipmr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
+ struct net_device *vif_dev,
vifi_t vif_index, u32 tb_id)
{
return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
- vif, vif_index, tb_id,
+ vif, vif_dev, vif_index, tb_id,
&net->ipv4.ipmr_seq);
}
@@ -649,22 +659,19 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
- if (VIF_EXISTS(mrt, vifi))
- call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
- mrt->id);
-
- write_lock_bh(&mrt_lock);
- dev = v->dev;
- v->dev = NULL;
-
- if (!dev) {
- write_unlock_bh(&mrt_lock);
+ dev = rtnl_dereference(v->dev);
+ if (!dev)
return -EADDRNOTAVAIL;
- }
- if (vifi == mrt->mroute_reg_vif_num)
- mrt->mroute_reg_vif_num = -1;
+ spin_lock(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev,
+ vifi, mrt->id);
+ RCU_INIT_POINTER(v->dev, NULL);
+ if (vifi == mrt->mroute_reg_vif_num) {
+ /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
+ WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
+ }
if (vifi + 1 == mrt->maxvif) {
int tmp;
@@ -672,10 +679,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
if (VIF_EXISTS(mrt, tmp))
break;
}
- mrt->maxvif = tmp+1;
+ WRITE_ONCE(mrt->maxvif, tmp + 1);
}
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
dev_set_allmulti(dev, -1);
@@ -691,7 +698,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
unregister_netdevice_queue(dev, head);
- dev_put_track(dev, &v->dev_tracker);
+ netdev_put(dev, &v->dev_tracker);
return 0;
}
@@ -777,7 +784,7 @@ out:
spin_unlock(&mfc_unres_lock);
}
-/* Fill oifs list. It is called under write locked mrt_lock. */
+/* Fill oifs list. It is called under locked mrt_lock. */
static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
@@ -889,15 +896,18 @@ static int vif_add(struct net *net, struct mr_table *mrt,
v->remote = vifc->vifc_rmt_addr.s_addr;
/* And finish update writing critical data */
- write_lock_bh(&mrt_lock);
- v->dev = dev;
+ spin_lock(&mrt_lock);
+ rcu_assign_pointer(v->dev, dev);
netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
- if (v->flags & VIFF_REGISTER)
- mrt->mroute_reg_vif_num = vifi;
+ if (v->flags & VIFF_REGISTER) {
+ /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
+ WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
+ }
if (vifi+1 > mrt->maxvif)
- mrt->maxvif = vifi+1;
- write_unlock_bh(&mrt_lock);
- call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
+ WRITE_ONCE(mrt->maxvif, vifi + 1);
+ spin_unlock(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev,
+ vifi, mrt->id);
return 0;
}
@@ -994,16 +1004,18 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
+ rcu_read_lock();
ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
+ rcu_read_unlock();
}
}
}
/* Bounce a cache query up to mrouted and netlink.
*
- * Called under mrt_lock.
+ * Called under rcu_read_lock().
*/
-static int ipmr_cache_report(struct mr_table *mrt,
+static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert)
{
const int ihl = ip_hdrlen(pkt);
@@ -1038,8 +1050,11 @@ static int ipmr_cache_report(struct mr_table *mrt,
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
} else {
- msg->im_vif = mrt->mroute_reg_vif_num;
- msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8;
+ /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
+ int vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
+
+ msg->im_vif = vif_num;
+ msg->im_vif_hi = vif_num >> 8;
}
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
@@ -1064,10 +1079,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
skb->transport_header = skb->network_header;
}
- rcu_read_lock();
mroute_sk = rcu_dereference(mrt->mroute_sk);
if (!mroute_sk) {
- rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
@@ -1076,7 +1089,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
/* Deliver to mrouted */
ret = sock_queue_rcv_skb(mroute_sk, skb);
- rcu_read_unlock();
+
if (ret < 0) {
net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1086,6 +1099,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
}
/* Queue a packet for resolution. It gets locked cache entry! */
+/* Called under rcu_read_lock() */
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
struct sk_buff *skb, struct net_device *dev)
{
@@ -1198,12 +1212,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
mfc->mfcc_mcastgrp.s_addr, parent);
rcu_read_unlock();
if (c) {
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
c->_c.mfc_parent = mfc->mfcc_parent;
ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
c->_c.mfc_flags |= MFC_STATIC;
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
@@ -1534,7 +1548,8 @@ out:
}
/* Getsock opt support for the multicast routing system. */
-int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
+int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
+ sockptr_t optlen)
{
int olr;
int val;
@@ -1565,14 +1580,14 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
return -ENOPROTOOPT;
}
- if (get_user(olr, optlen))
+ if (copy_from_sockptr(&olr, optlen, sizeof(int)))
return -EFAULT;
olr = min_t(unsigned int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
- if (put_user(olr, optlen))
+ if (copy_to_sockptr(optlen, &olr, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, olr))
+ if (copy_to_sockptr(optval, &val, olr))
return -EFAULT;
return 0;
}
@@ -1598,20 +1613,20 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.vifi >= mrt->maxvif)
return -EINVAL;
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.vifi];
if (VIF_EXISTS(mrt, vr.vifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1673,20 +1688,20 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.vifi >= mrt->maxvif)
return -EINVAL;
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.vifi];
if (VIF_EXISTS(mrt, vr.vifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1726,7 +1741,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
ipmr_for_each_table(mrt, net) {
v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
- if (v->dev == dev)
+ if (rcu_access_pointer(v->dev) == dev)
vif_delete(mrt, ct, 1, NULL);
}
}
@@ -1804,26 +1819,28 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
}
#endif
-/* Processing handlers for ipmr_forward */
+/* Processing handlers for ipmr_forward, under rcu_read_lock() */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
int in_vifi, struct sk_buff *skb, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
+ struct net_device *vif_dev;
struct net_device *dev;
struct rtable *rt;
struct flowi4 fl4;
int encap = 0;
- if (!vif->dev)
+ vif_dev = vif_dev_read(vif);
+ if (!vif_dev)
goto out_free;
if (vif->flags & VIFF_REGISTER) {
- vif->pkt_out++;
- vif->bytes_out += skb->len;
- vif->dev->stats.tx_bytes += skb->len;
- vif->dev->stats.tx_packets++;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
+ vif_dev->stats.tx_bytes += skb->len;
+ vif_dev->stats.tx_packets++;
ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
goto out_free;
}
@@ -1868,8 +1885,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
goto out_free;
}
- vif->pkt_out++;
- vif->bytes_out += skb->len;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
@@ -1881,8 +1898,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
if (vif->flags & VIFF_TUNNEL) {
ip_encap(net, skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
- vif->dev->stats.tx_packets++;
- vif->dev->stats.tx_bytes += skb->len;
+ vif_dev->stats.tx_packets++;
+ vif_dev->stats.tx_bytes += skb->len;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1906,18 +1923,20 @@ out_free:
kfree_skb(skb);
}
-static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
+/* Called with mrt_lock or rcu_read_lock() */
+static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
{
int ct;
-
- for (ct = mrt->maxvif-1; ct >= 0; ct--) {
- if (mrt->vif_table[ct].dev == dev)
+ /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */
+ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
+ if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
break;
}
return ct;
}
/* "local" means that we should preserve one skb (for local delivery) */
+/* Called uner rcu_read_lock() */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc_cache *c, int local)
@@ -1944,7 +1963,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
}
/* Wrong interface: drop packet and (maybe) send PIM assert. */
- if (mrt->vif_table[vif].dev != dev) {
+ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
* Very complicated situation...
@@ -1983,8 +2002,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
}
forward:
- mrt->vif_table[vif].pkt_in++;
- mrt->vif_table[vif].bytes_in += skb->len;
+ WRITE_ONCE(mrt->vif_table[vif].pkt_in,
+ mrt->vif_table[vif].pkt_in + 1);
+ WRITE_ONCE(mrt->vif_table[vif].bytes_in,
+ mrt->vif_table[vif].bytes_in + skb->len);
/* Forward the frame */
if (c->mfc_origin == htonl(INADDR_ANY) &&
@@ -2140,22 +2161,14 @@ int ip_mr_input(struct sk_buff *skb)
skb = skb2;
}
- read_lock(&mrt_lock);
vif = ipmr_find_vif(mrt, dev);
- if (vif >= 0) {
- int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
- read_unlock(&mrt_lock);
-
- return err2;
- }
- read_unlock(&mrt_lock);
+ if (vif >= 0)
+ return ipmr_cache_unresolved(mrt, vif, skb, dev);
kfree_skb(skb);
return -ENODEV;
}
- read_lock(&mrt_lock);
ip_mr_forward(net, mrt, dev, skb, cache, local);
- read_unlock(&mrt_lock);
if (local)
return ip_local_deliver(skb);
@@ -2252,18 +2265,15 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
int vif = -1;
dev = skb->dev;
- read_lock(&mrt_lock);
if (dev)
vif = ipmr_find_vif(mrt, dev);
if (vif < 0) {
- read_unlock(&mrt_lock);
rcu_read_unlock();
return -ENODEV;
}
skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
if (!skb2) {
- read_unlock(&mrt_lock);
rcu_read_unlock();
return -ENOMEM;
}
@@ -2277,14 +2287,11 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
iph->daddr = daddr;
iph->version = 0;
err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
- read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
}
- read_lock(&mrt_lock);
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
- read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
}
@@ -2404,7 +2411,7 @@ static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2744,18 +2751,21 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
{
+ struct net_device *vif_dev;
struct nlattr *vif_nest;
struct vif_device *vif;
+ vif = &mrt->vif_table[vifid];
+ vif_dev = rtnl_dereference(vif->dev);
/* if the VIF doesn't exist just continue */
- if (!VIF_EXISTS(mrt, vifid))
+ if (!vif_dev)
return true;
- vif = &mrt->vif_table[vifid];
vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
if (!vif_nest)
return false;
- if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
+
+ if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) ||
nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
@@ -2887,7 +2897,7 @@ out:
*/
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(mrt_lock)
+ __acquires(RCU)
{
struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
@@ -2899,14 +2909,14 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
- read_lock(&mrt_lock);
+ rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
- __releases(mrt_lock)
+ __releases(RCU)
{
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
}
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
@@ -2919,9 +2929,11 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ?
- vif->dev->name : "none";
+ const struct net_device *vif_dev;
+ const char *name;
+ vif_dev = vif_dev_read(vif);
+ name = vif_dev ? vif_dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
vif - mrt->vif_table,
@@ -3017,7 +3029,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
- ipmr_mr_table_iter, &mrt_lock, extack);
+ ipmr_mr_table_iter, extack);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
@@ -3075,7 +3087,9 @@ static int __net_init ipmr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip_mr_vif", net->proc_net);
proc_vif_fail:
+ rtnl_lock();
ipmr_rules_exit(net);
+ rtnl_unlock();
#endif
ipmr_rules_fail:
ipmr_notifier_exit(net);
@@ -3090,12 +3104,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
ipmr_notifier_exit(net);
- ipmr_rules_exit(net);
+}
+
+static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ipmr_rules_exit(net);
+ rtnl_unlock();
}
static struct pernet_operations ipmr_net_ops = {
.init = ipmr_net_init,
.exit = ipmr_net_exit,
+ .exit_batch = ipmr_net_exit_batch,
};
int __init ip_mr_init(void)
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index aa8738a91210..271dc03fc6db 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v,
unsigned short flags,
unsigned short get_iflink_mask)
{
- v->dev = NULL;
+ RCU_INIT_POINTER(v->dev, NULL);
v->bytes_in = 0;
v->bytes_out = 0;
v->pkt_in = 0;
@@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next);
int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mr_mfc *c, struct rtmsg *rtm)
{
+ struct net_device *vif_dev;
struct rta_mfc_stats mfcs;
struct nlattr *mp_attr;
struct rtnexthop *nhp;
@@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
return -ENOENT;
}
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF,
- mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ rcu_read_lock();
+ vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev);
+ if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) {
+ rcu_read_unlock();
return -EMSGSIZE;
+ }
+ rcu_read_unlock();
if (c->mfc_flags & MFC_OFFLOAD)
rtm->rtm_flags |= RTNH_F_OFFLOAD;
@@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (!mp_attr)
return -EMSGSIZE;
+ rcu_read_lock();
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- struct vif_device *vif;
+ struct vif_device *vif = &mrt->vif_table[ct];
+
+ vif_dev = rcu_dereference(vif->dev);
+ if (vif_dev && c->mfc_un.res.ttls[ct] < 255) {
nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
if (!nhp) {
+ rcu_read_unlock();
nla_nest_cancel(skb, mp_attr);
return -EMSGSIZE;
}
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- vif = &mrt->vif_table[ct];
- nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_ifindex = vif_dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, mp_attr);
@@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt,
int ct;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- const struct vif_device *vif;
-
- vif = &mrt->vif_table[ct];
- if (vif->dev == dev)
- return true;
- }
+ const struct net_device *vif_dev;
+ const struct vif_device *vif;
+
+ vif = &mrt->vif_table[ct];
+ vif_dev = rcu_access_pointer(vif->dev);
+ if (vif_dev && c->mfc_un.res.ttls[ct] < 255 &&
+ vif_dev == dev)
+ return true;
}
return false;
}
@@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
- rwlock_t *mrt_lock,
struct netlink_ext_ack *extack)
{
struct mr_table *mrt;
@@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
struct vif_device *v = &mrt->vif_table[0];
+ struct net_device *vif_dev;
struct mr_mfc *mfc;
int vifi;
/* Notifiy on table VIF entries */
- read_lock(mrt_lock);
+ rcu_read_lock();
for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
- if (!v->dev)
+ vif_dev = rcu_dereference(v->dev);
+ if (!vif_dev)
continue;
err = mr_call_vif_notifier(nb, family,
- FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id, extack);
+ FIB_EVENT_VIF_ADD, v,
+ vif_dev, vifi,
+ mrt->id, extack);
if (err)
break;
}
- read_unlock(mrt_lock);
+ rcu_read_unlock();
if (err)
return err;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index aff707988e23..bd135165482a 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -45,8 +45,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
- if (!fl4.flowi4_oif)
- fl4.flowi4_oif = l3mdev_master_ifindex(dev);
+ fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_flags = flags;
fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 67087f95579f..aab384126f61 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -58,10 +58,6 @@ config NF_TABLES_ARP
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV4
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index b518f20c9a24..f8e176c77d1c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -776,7 +776,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct clusterip_config *c = PDE_DATA(inode);
+ struct clusterip_config *c = pde_data(inode);
sf->private = c;
@@ -788,7 +788,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct clusterip_config *c = PDE_DATA(inode);
+ struct clusterip_config *c = pde_data(inode);
int ret;
ret = seq_release(inode, file);
@@ -802,7 +802,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
- struct clusterip_config *c = PDE_DATA(file_inode(file));
+ struct clusterip_config *c = pde_data(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
unsigned long nodenum;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 8cd3224d913e..ded5bef02f77 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -33,7 +33,6 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
const struct net_device *dev, u8 flags)
{
struct fib_result res;
- int ret __maybe_unused;
if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
return false;
@@ -78,7 +77,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
- flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
+ flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
+ flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
}
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ /dev/null
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 3e2685c120c7..faee20af4856 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -291,20 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
exp->expectfn = nf_nat_follow_master;
exp->dir = !dir;
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- nated_port = 0;
- break;
- }
- }
-
+ nated_port = nf_nat_exp_find_port(exp, nated_port);
if (nated_port == 0) { /* No port available */
net_notice_ratelimited("nf_nat_h323: out of TCP ports\n");
return 0;
@@ -347,20 +334,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
if (info->sig_port[dir] == port)
nated_port = ntohs(info->sig_port[!dir]);
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- nated_port = 0;
- break;
- }
- }
-
+ nated_port = nf_nat_exp_find_port(exp, nated_port);
if (nated_port == 0) { /* No port available */
net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
return 0;
@@ -439,20 +413,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
if (info->sig_port[dir] == port)
nated_port = ntohs(info->sig_port[!dir]);
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- nated_port = 0;
- break;
- }
- }
-
+ nated_port = nf_nat_exp_find_port(exp, nated_port);
if (nated_port == 0) { /* No port available */
net_notice_ratelimited("nf_nat_ras: out of TCP ports\n");
return 0;
@@ -532,20 +493,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
exp->expectfn = ip_nat_callforwarding_expect;
exp->dir = !dir;
- /* Try to get same port: if not, try to change it. */
- for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- nated_port = 0;
- break;
- }
- }
-
+ nated_port = nf_nat_exp_find_port(exp, ntohs(port));
if (nated_port == 0) { /* No port available */
net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
return 0;
@@ -579,53 +527,39 @@ static struct nf_ct_helper_expectfn callforwarding_nat = {
.expectfn = ip_nat_callforwarding_expect,
};
+static const struct nfct_h323_nat_hooks nathooks = {
+ .set_h245_addr = set_h245_addr,
+ .set_h225_addr = set_h225_addr,
+ .set_sig_addr = set_sig_addr,
+ .set_ras_addr = set_ras_addr,
+ .nat_rtp_rtcp = nat_rtp_rtcp,
+ .nat_t120 = nat_t120,
+ .nat_h245 = nat_h245,
+ .nat_callforwarding = nat_callforwarding,
+ .nat_q931 = nat_q931,
+};
+
/****************************************************************************/
-static int __init init(void)
+static int __init nf_nat_h323_init(void)
{
- BUG_ON(set_h245_addr_hook != NULL);
- BUG_ON(set_h225_addr_hook != NULL);
- BUG_ON(set_sig_addr_hook != NULL);
- BUG_ON(set_ras_addr_hook != NULL);
- BUG_ON(nat_rtp_rtcp_hook != NULL);
- BUG_ON(nat_t120_hook != NULL);
- BUG_ON(nat_h245_hook != NULL);
- BUG_ON(nat_callforwarding_hook != NULL);
- BUG_ON(nat_q931_hook != NULL);
-
- RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
- RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
- RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
- RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
- RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
- RCU_INIT_POINTER(nat_t120_hook, nat_t120);
- RCU_INIT_POINTER(nat_h245_hook, nat_h245);
- RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
- RCU_INIT_POINTER(nat_q931_hook, nat_q931);
+ RCU_INIT_POINTER(nfct_h323_nat_hook, &nathooks);
nf_ct_helper_expectfn_register(&q931_nat);
nf_ct_helper_expectfn_register(&callforwarding_nat);
return 0;
}
/****************************************************************************/
-static void __exit fini(void)
+static void __exit nf_nat_h323_fini(void)
{
- RCU_INIT_POINTER(set_h245_addr_hook, NULL);
- RCU_INIT_POINTER(set_h225_addr_hook, NULL);
- RCU_INIT_POINTER(set_sig_addr_hook, NULL);
- RCU_INIT_POINTER(set_ras_addr_hook, NULL);
- RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
- RCU_INIT_POINTER(nat_t120_hook, NULL);
- RCU_INIT_POINTER(nat_h245_hook, NULL);
- RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
- RCU_INIT_POINTER(nat_q931_hook, NULL);
+ RCU_INIT_POINTER(nfct_h323_nat_hook, NULL);
nf_ct_helper_expectfn_unregister(&q931_nat);
nf_ct_helper_expectfn_unregister(&callforwarding_nat);
synchronize_rcu();
}
/****************************************************************************/
-module_init(init);
-module_exit(fini);
+module_init(nf_nat_h323_init);
+module_exit(nf_nat_h323_fini);
MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
MODULE_DESCRIPTION("H.323 NAT helper");
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3f248a19faa3..fab357cc8559 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -295,28 +295,24 @@ pptp_inbound_pkt(struct sk_buff *skb,
return NF_ACCEPT;
}
+static const struct nf_nat_pptp_hook pptp_hooks = {
+ .outbound = pptp_outbound_pkt,
+ .inbound = pptp_inbound_pkt,
+ .exp_gre = pptp_exp_gre,
+ .expectfn = pptp_nat_expected,
+};
+
static int __init nf_nat_helper_pptp_init(void)
{
- BUG_ON(nf_nat_pptp_hook_outbound != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
-
- BUG_ON(nf_nat_pptp_hook_inbound != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
-
- BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+ WARN_ON(nf_nat_pptp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook, &pptp_hooks);
- BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
return 0;
}
static void __exit nf_nat_helper_pptp_fini(void)
{
- RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
- RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 4eed5afca392..d640adcaf1b1 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
@@ -80,6 +80,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
struct iphdr *niph;
struct icmphdr *icmph;
unsigned int len;
+ int dataoff;
__wsum csum;
u8 proto;
@@ -99,10 +100,11 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
return NULL;
+ dataoff = ip_hdrlen(oldskb);
proto = ip_hdr(oldskb)->protocol;
if (!skb_csum_unnecessary(oldskb) &&
- nf_reject_verify_csum(proto) &&
+ nf_reject_verify_csum(oldskb, dataoff, proto) &&
nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return NULL;
@@ -115,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
skb_reset_transport_header(nskb);
icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
@@ -311,6 +313,7 @@ EXPORT_SYMBOL_GPL(nf_send_reset);
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
{
struct iphdr *iph = ip_hdr(skb_in);
+ int dataoff = ip_hdrlen(skb_in);
u8 proto = iph->protocol;
if (iph->frag_off & htons(IP_OFFSET))
@@ -320,12 +323,13 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
nf_reject_fill_skb_dst(skb_in) < 0)
return;
- if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
+ if (skb_csum_unnecessary(skb_in) ||
+ !nf_reject_verify_csum(skb_in, dataoff, proto)) {
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
return;
}
- if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+ if (nf_ip_checksum(skb_in, hook, dataoff, proto) == 0)
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
EXPORT_SYMBOL_GPL(nf_send_unreach);
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index 2d42e4c35a20..a1350fc25838 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -71,8 +71,8 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
{
switch (protocol) {
case IPPROTO_TCP:
- return inet_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
+ return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo,
+ skb, doff, saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index b2bae0b0e42a..b22b2c745c76 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -79,6 +79,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
+ struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
struct sock *sk;
switch (protocol) {
@@ -92,12 +93,10 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
- ip_hdrlen(skb) +
- __tcp_hdrlen(hp),
- saddr, sport,
- daddr, dport,
- in->ifindex, 0);
+ sk = inet_lookup_listener(net, hinfo, skb,
+ ip_hdrlen(skb) + __tcp_hdrlen(hp),
+ saddr, sport, daddr, dport,
+ in->ifindex, 0);
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
sk = NULL;
@@ -108,9 +107,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
- sk = inet_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
+ sk = inet_lookup_established(net, hinfo, saddr, sport,
+ daddr, dport, in->ifindex);
break;
default:
BUG();
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index aeb631760eb9..0bcd6aee6000 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -75,6 +75,7 @@ static const struct nft_expr_ops nft_dup_ipv4_ops = {
.eval = nft_dup_ipv4_eval,
.init = nft_dup_ipv4_init,
.dump = nft_dup_ipv4_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = {
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 03df986217b7..fc65d69f23e1 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
};
const struct net_device *oif;
const struct net_device *found;
@@ -83,6 +84,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
+ if (priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif);
+
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv, nft_in(pkt));
@@ -112,6 +116,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_iif = nft_out(pkt)->ifindex;
+
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
@@ -152,6 +160,7 @@ static const struct nft_expr_ops nft_fib4_type_ops = {
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
+ .reduce = nft_fib_reduce,
};
static const struct nft_expr_ops nft_fib4_ops = {
@@ -161,6 +170,7 @@ static const struct nft_expr_ops nft_fib4_ops = {
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
+ .reduce = nft_fib_reduce,
};
static const struct nft_expr_ops *
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 55fc23a8f7a7..6cb213bb7256 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -45,6 +45,7 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = {
.init = nft_reject_init,
.dump = nft_reject_dump,
.validate = nft_reject_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index eeafeccebb8d..d8ef05347fd9 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
ipv6_stub->ip6_del_rt(net, f6i,
- !net->ipv4.sysctl_nexthop_compat_mode);
+ !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
}
}
@@ -2361,7 +2361,8 @@ out:
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
- if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
+ if (replace_notify &&
+ READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
@@ -2533,7 +2534,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
if (!err) {
nh->nh_flags = fib_nh->fib_nh_flags;
fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
- fib_nh->fib_nh_scope);
+ !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1);
} else {
fib_nh_release(net, fib_nh);
}
@@ -3733,12 +3734,16 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit(struct net *net)
+static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- flush_all_nexthops(net);
+ list_for_each_entry(net, net_list, exit_list) {
+ flush_all_nexthops(net);
+ kfree(net->nexthop.devhash);
+ }
rtnl_unlock();
- kfree(net->nexthop.devhash);
}
static int __net_init nexthop_net_init(struct net *net)
@@ -3756,7 +3761,7 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
- .exit = nexthop_net_exit,
+ .exit_batch = nexthop_net_exit_batch,
};
static int __init nexthop_init(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 0e56df3a45e2..bde333b24837 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -33,6 +33,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/export.h>
+#include <linux/bpf-cgroup.h>
#include <net/sock.h>
#include <net/ping.h>
#include <net/udp.h>
@@ -50,7 +51,7 @@
struct ping_table {
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
- rwlock_t lock;
+ spinlock_t lock;
};
static struct ping_table ping_table;
@@ -82,7 +83,7 @@ int ping_get_port(struct sock *sk, unsigned short ident)
struct sock *sk2 = NULL;
isk = inet_sk(sk);
- write_lock_bh(&ping_table.lock);
+ spin_lock(&ping_table.lock);
if (ident == 0) {
u32 i;
u16 result = ping_port_rover + 1;
@@ -128,14 +129,15 @@ next_port:
if (sk_unhashed(sk)) {
pr_debug("was not hashed\n");
sock_hold(sk);
- hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
- write_unlock_bh(&ping_table.lock);
+ spin_unlock(&ping_table.lock);
return 0;
fail:
- write_unlock_bh(&ping_table.lock);
+ spin_unlock(&ping_table.lock);
return 1;
}
EXPORT_SYMBOL_GPL(ping_get_port);
@@ -153,39 +155,43 @@ void ping_unhash(struct sock *sk)
struct inet_sock *isk = inet_sk(sk);
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
- write_lock_bh(&ping_table.lock);
+ spin_lock(&ping_table.lock);
if (sk_hashed(sk)) {
- hlist_nulls_del(&sk->sk_nulls_node);
- sk_nulls_node_init(&sk->sk_nulls_node);
+ hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
sock_put(sk);
isk->inet_num = 0;
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
}
- write_unlock_bh(&ping_table.lock);
+ spin_unlock(&ping_table.lock);
}
EXPORT_SYMBOL_GPL(ping_unhash);
+/* Called under rcu_read_lock() */
static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
{
struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ return NULL;
}
- read_lock_bh(&ping_table.lock);
-
ping_portaddr_for_each_entry(sk, hnode, hslot) {
isk = inet_sk(sk);
@@ -220,16 +226,15 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
- sock_hold(sk);
goto exit;
}
sk = NULL;
exit:
- read_unlock_bh(&ping_table.lock);
return sk;
}
@@ -291,6 +296,19 @@ void ping_close(struct sock *sk, long timeout)
}
EXPORT_SYMBOL_GPL(ping_close);
+static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from __ip4_datagram_connect() and
+ * intended to prevent BPF program called below from accessing bytes
+ * that are out of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+}
+
/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
struct sockaddr *uaddr, int addr_len)
@@ -298,6 +316,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
struct net *net = sock_net(sk);
if (sk->sk_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ u32 tb_id = RT_TABLE_LOCAL;
int chk_addr_ret;
if (addr_len < sizeof(*addr))
@@ -311,11 +330,16 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ return 0;
- if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
- addr->sin_addr.s_addr,
- chk_addr_ret))
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
+
+ if (chk_addr_ret == RTN_MULTICAST ||
+ chk_addr_ret == RTN_BROADCAST ||
+ (chk_addr_ret != RTN_LOCAL &&
+ !inet_can_nonlocal_bind(net, isk)))
return -EADDRNOTAVAIL;
#if IS_ENABLED(CONFIG_IPV6)
@@ -348,6 +372,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
return -ENODEV;
}
}
+
+ if (!dev && sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
scoped);
rcu_read_unlock();
@@ -571,7 +603,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
sk->sk_err = err;
sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
EXPORT_SYMBOL_GPL(ping_err);
@@ -583,23 +615,11 @@ EXPORT_SYMBOL_GPL(ping_err);
int ping_getfrag(void *from, char *to,
int offset, int fraglen, int odd, struct sk_buff *skb)
{
- struct pingfakehdr *pfh = (struct pingfakehdr *)from;
-
- if (offset == 0) {
- fraglen -= sizeof(struct icmphdr);
- if (fraglen < 0)
- BUG();
- if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr),
- fraglen, &pfh->wcheck,
- &pfh->msg->msg_iter))
- return -EFAULT;
- } else if (offset < sizeof(struct icmphdr)) {
- BUG();
- } else {
- if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck,
- &pfh->msg->msg_iter))
- return -EFAULT;
- }
+ struct pingfakehdr *pfh = from;
+
+ if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck,
+ &pfh->msg->msg_iter))
+ return -EFAULT;
#if IS_ENABLED(CONFIG_IPV6)
/* For IPv6, checksum each skb as we go along, as expected by
@@ -607,7 +627,7 @@ int ping_getfrag(void *from, char *to,
* wcheck, it will be finalized in ping_v4_push_pending_frames.
*/
if (pfh->family == AF_INET6) {
- skb->csum = pfh->wcheck;
+ skb->csum = csum_block_add(skb->csum, pfh->wcheck, odd);
skb->ip_summed = CHECKSUM_NONE;
pfh->wcheck = 0;
}
@@ -810,7 +830,8 @@ back_from_confirm:
pfh.family = AF_INET;
err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
- 0, &ipc, &rt, msg->msg_flags);
+ sizeof(struct icmphdr), &ipc, &rt,
+ msg->msg_flags);
if (err)
ip_flush_pending_frames(sk);
else
@@ -837,8 +858,8 @@ do_confirm:
goto out;
}
-int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
int family = sk->sk_family;
@@ -854,7 +875,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
if (flags & MSG_ERRQUEUE)
return inet_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -927,16 +948,24 @@ out:
}
EXPORT_SYMBOL_GPL(ping_recvmsg);
-int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk,
+ struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
pr_debug("ping_queue_rcv_skb -> failed\n");
- return -1;
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
+}
+
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return __ping_queue_rcv_skb(sk, skb) ? -1 : 0;
}
EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
@@ -945,12 +974,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
* All we need to do is get the socket.
*/
-bool ping_rcv(struct sk_buff *skb)
+enum skb_drop_reason ping_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
- bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -965,15 +994,16 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2 && !ping_queue_rcv_skb(sk, skb2))
- rc = true;
- sock_put(sk);
+ if (skb2)
+ reason = __ping_queue_rcv_skb(sk, skb2);
+ else
+ reason = SKB_DROP_REASON_NOMEM;
}
- if (!rc)
+ if (reason)
pr_debug("no socket, dropping\n");
- return rc;
+ return reason;
}
EXPORT_SYMBOL_GPL(ping_rcv);
@@ -982,6 +1012,7 @@ struct proto ping_prot = {
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
+ .pre_connect = ping_pre_connect,
.connect = ip4_datagram_connect,
.disconnect = __udp_disconnect,
.setsockopt = ip_setsockopt,
@@ -1053,13 +1084,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
}
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
- __acquires(ping_table.lock)
+ __acquires(RCU)
{
struct ping_iter_state *state = seq->private;
state->bucket = 0;
state->family = family;
- read_lock_bh(&ping_table.lock);
+ rcu_read_lock();
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
@@ -1085,9 +1116,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(ping_seq_next);
void ping_seq_stop(struct seq_file *seq, void *v)
- __releases(ping_table.lock)
+ __releases(RCU)
{
- read_unlock_bh(&ping_table.lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ping_seq_stop);
@@ -1171,5 +1202,5 @@ void __init ping_init(void)
for (i = 0; i < PING_HTABLE_SIZE; i++)
INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
- rwlock_init(&ping_table.lock);
+ spin_lock_init(&ping_table.lock);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f30273afb539..5386f460bd20 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,8 +59,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
- atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
- proto_memory_allocated(&tcp_prot));
+ refcount_read(&net->ipv4.tcp_death_row.tw_refcount) - 1,
+ sockets, proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
proto_memory_allocated(&udp_prot));
@@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a53f256bf9d3..006c1f0ed8b4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -85,21 +85,20 @@ struct raw_frag_vec {
int hlen;
};
-struct raw_hashinfo raw_v4_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
-};
+struct raw_hashinfo raw_v4_hashinfo;
EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
- struct hlist_head *head;
+ struct hlist_nulls_head *hlist;
- head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+ hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
- write_lock_bh(&h->lock);
- sk_add_node(sk, head);
- write_unlock_bh(&h->lock);
+ spin_lock(&h->lock);
+ __sk_nulls_add_node_rcu(sk, hlist);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ spin_unlock(&h->lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
@@ -110,31 +109,26 @@ void raw_unhash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
- write_lock_bh(&h->lock);
- if (sk_del_node_init(sk))
+ spin_lock(&h->lock);
+ if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- write_unlock_bh(&h->lock);
+ spin_unlock(&h->lock);
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);
-struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
- unsigned short num, __be32 raddr, __be32 laddr,
- int dif, int sdif)
+bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr, int dif, int sdif)
{
- sk_for_each_from(sk) {
- struct inet_sock *inet = inet_sk(sk);
-
- if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
- !(inet->inet_daddr && inet->inet_daddr != raddr) &&
- !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
- raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
- goto found; /* gotcha */
- }
- sk = NULL;
-found:
- return sk;
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
+ !(inet->inet_daddr && inet->inet_daddr != raddr) &&
+ !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+ raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
+ return true;
+ return false;
}
-EXPORT_SYMBOL_GPL(__raw_v4_lookup);
+EXPORT_SYMBOL_GPL(raw_v4_match);
/*
* 0 - deliver
@@ -168,23 +162,20 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
*/
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
- struct sock *sk;
- struct hlist_head *head;
int delivered = 0;
- struct net *net;
-
- read_lock(&raw_v4_hashinfo.lock);
- head = &raw_v4_hashinfo.ht[hash];
- if (hlist_empty(head))
- goto out;
-
- net = dev_net(skb->dev);
- sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
- iph->saddr, iph->daddr, dif, sdif);
+ struct sock *sk;
- while (sk) {
+ hlist = &raw_v4_hashinfo.ht[hash];
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
+ if (!raw_v4_match(net, sk, iph->protocol,
+ iph->saddr, iph->daddr, dif, sdif))
+ continue;
delivered = 1;
if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
@@ -195,31 +186,16 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
if (clone)
raw_rcv(sk, clone);
}
- sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
- iph->saddr, iph->daddr,
- dif, sdif);
}
-out:
- read_unlock(&raw_v4_hashinfo.lock);
+ rcu_read_unlock();
return delivered;
}
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
- int hash;
- struct sock *raw_sk;
-
- hash = protocol & (RAW_HTABLE_SIZE - 1);
- raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
-
- /* If there maybe a raw socket we must check - if not we
- * don't care less
- */
- if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
- raw_sk = NULL;
-
- return raw_sk != NULL;
+ int hash = protocol & (RAW_HTABLE_SIZE - 1);
+ return raw_v4_input(skb, ip_hdr(skb), hash);
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -286,31 +262,27 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
{
- int hash;
- struct sock *raw_sk;
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
+ int dif = skb->dev->ifindex;
+ int sdif = inet_sdif(skb);
const struct iphdr *iph;
- struct net *net;
+ struct sock *sk;
+ int hash;
hash = protocol & (RAW_HTABLE_SIZE - 1);
+ hlist = &raw_v4_hashinfo.ht[hash];
- read_lock(&raw_v4_hashinfo.lock);
- raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
- if (raw_sk) {
- int dif = skb->dev->ifindex;
- int sdif = inet_sdif(skb);
-
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
iph = (const struct iphdr *)skb->data;
- net = dev_net(skb->dev);
-
- while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
- iph->daddr, iph->saddr,
- dif, sdif)) != NULL) {
- raw_err(raw_sk, skb, info);
- raw_sk = sk_next(raw_sk);
- iph = (const struct iphdr *)skb->data;
- }
+ if (!raw_v4_match(net, sk, iph->protocol,
+ iph->daddr, iph->saddr, dif, sdif))
+ continue;
+ raw_err(sk, skb, info);
}
- read_unlock(&raw_v4_hashinfo.lock);
+ rcu_read_unlock();
}
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -722,6 +694,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -741,7 +714,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
@@ -750,7 +725,7 @@ out: return ret;
*/
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -766,7 +741,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -780,7 +755,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (sin) {
@@ -968,44 +943,41 @@ struct proto raw_prot = {
};
#ifdef CONFIG_PROC_FS
-static struct sock *raw_get_first(struct seq_file *seq)
+static struct sock *raw_get_first(struct seq_file *seq, int bucket)
{
- struct sock *sk;
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
+ struct raw_hashinfo *h = pde_data(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
+ struct sock *sk;
- for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
+ for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE;
++state->bucket) {
- sk_for_each(sk, &h->ht[state->bucket])
+ hlist = &h->ht[state->bucket];
+ sk_nulls_for_each(sk, hnode, hlist) {
if (sock_net(sk) == seq_file_net(seq))
- goto found;
+ return sk;
+ }
}
- sk = NULL;
-found:
- return sk;
+ return NULL;
}
static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
struct raw_iter_state *state = raw_seq_private(seq);
do {
- sk = sk_next(sk);
-try_again:
- ;
+ sk = sk_nulls_next(sk);
} while (sk && sock_net(sk) != seq_file_net(seq));
- if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
- sk = sk_head(&h->ht[state->bucket]);
- goto try_again;
- }
+ if (!sk)
+ return raw_get_first(seq, state->bucket + 1);
return sk;
}
static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
{
- struct sock *sk = raw_get_first(seq);
+ struct sock *sk = raw_get_first(seq, 0);
if (sk)
while (pos && (sk = raw_get_next(seq, sk)) != NULL)
@@ -1014,11 +986,9 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
}
void *raw_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(&h->lock)
+ __acquires(RCU)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
-
- read_lock(&h->lock);
+ rcu_read_lock();
return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
EXPORT_SYMBOL_GPL(raw_seq_start);
@@ -1028,7 +998,7 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
struct sock *sk;
if (v == SEQ_START_TOKEN)
- sk = raw_get_first(seq);
+ sk = raw_get_first(seq, 0);
else
sk = raw_get_next(seq, v);
++*pos;
@@ -1037,11 +1007,9 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL_GPL(raw_seq_next);
void raw_seq_stop(struct seq_file *seq, void *v)
- __releases(&h->lock)
+ __releases(RCU)
{
- struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
-
- read_unlock(&h->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(raw_seq_stop);
@@ -1103,6 +1071,7 @@ static __net_initdata struct pernet_operations raw_net_ops = {
int __init raw_proc_init(void)
{
+
return register_pernet_subsys(&raw_net_ops);
}
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index ccacbde30a2c..999321834b94 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -34,57 +34,57 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r)
* use helper to figure it out.
*/
-static struct sock *raw_lookup(struct net *net, struct sock *from,
- const struct inet_diag_req_v2 *req)
+static bool raw_lookup(struct net *net, struct sock *sk,
+ const struct inet_diag_req_v2 *req)
{
struct inet_diag_req_raw *r = (void *)req;
- struct sock *sk = NULL;
if (r->sdiag_family == AF_INET)
- sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
- r->id.idiag_dst[0],
- r->id.idiag_src[0],
- r->id.idiag_if, 0);
+ return raw_v4_match(net, sk, r->sdiag_raw_protocol,
+ r->id.idiag_dst[0],
+ r->id.idiag_src[0],
+ r->id.idiag_if, 0);
#if IS_ENABLED(CONFIG_IPV6)
else
- sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
- (const struct in6_addr *)r->id.idiag_src,
- (const struct in6_addr *)r->id.idiag_dst,
- r->id.idiag_if, 0);
+ return raw_v6_match(net, sk, r->sdiag_raw_protocol,
+ (const struct in6_addr *)r->id.idiag_src,
+ (const struct in6_addr *)r->id.idiag_dst,
+ r->id.idiag_if, 0);
#endif
- return sk;
+ return false;
}
static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
{
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
- struct sock *sk = NULL, *s;
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
+ struct sock *sk;
int slot;
if (IS_ERR(hashinfo))
return ERR_CAST(hashinfo);
- read_lock(&hashinfo->lock);
+ rcu_read_lock();
for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
- sk_for_each(s, &hashinfo->ht[slot]) {
- sk = raw_lookup(net, s, r);
- if (sk) {
+ hlist = &hashinfo->ht[slot];
+ sk_nulls_for_each(sk, hnode, hlist) {
+ if (raw_lookup(net, sk, r)) {
/*
* Grab it and keep until we fill
- * diag meaage to be reported, so
+ * diag message to be reported, so
* caller should call sock_put then.
- * We can do that because we're keeping
- * hashinfo->lock here.
*/
- sock_hold(sk);
- goto out_unlock;
+ if (refcount_inc_not_zero(&sk->sk_refcnt))
+ goto out_unlock;
}
}
}
+ sk = ERR_PTR(-ENOENT);
out_unlock:
- read_unlock(&hashinfo->lock);
+ rcu_read_unlock();
- return sk ? sk : ERR_PTR(-ENOENT);
+ return sk;
}
static int raw_diag_dump_one(struct netlink_callback *cb,
@@ -142,6 +142,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
int num, s_num, slot, s_slot;
struct sock *sk = NULL;
struct nlattr *bc;
@@ -154,11 +156,12 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
s_slot = cb->args[0];
num = s_num = cb->args[1];
- read_lock(&hashinfo->lock);
+ rcu_read_lock();
for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
num = 0;
- sk_for_each(sk, &hashinfo->ht[slot]) {
+ hlist = &hashinfo->ht[slot];
+ sk_nulls_for_each(sk, hnode, hlist) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
@@ -181,7 +184,7 @@ next:
}
out_unlock:
- read_unlock(&hashinfo->lock);
+ rcu_read_unlock();
cb->args[0] = slot;
cb->args[1] = num;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff6f91cdb6c4..cd1fa9f70f1a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -84,6 +84,7 @@
#include <linux/jhash.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
@@ -112,14 +113,13 @@
#define DEFAULT_MIN_PMTU (512 + 20 + 20)
#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
-
+#define DEFAULT_MIN_ADVMSS 256
static int ip_rt_max_size;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
-static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
@@ -458,7 +458,7 @@ static u32 *ip_tstamps __read_mostly;
* if one generator is seldom used. This makes hard for an attacker
* to infer how many packets were sent between two points in time.
*/
-u32 ip_idents_reserve(u32 hash, int segs)
+static u32 ip_idents_reserve(u32 hash, int segs)
{
u32 bucket, old, now = (u32)jiffies;
atomic_t *p_id;
@@ -479,7 +479,6 @@ u32 ip_idents_reserve(u32 hash, int segs)
*/
return atomic_add_return(segs + delta, p_id) - segs;
}
-EXPORT_SYMBOL(ip_idents_reserve);
void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
{
@@ -499,24 +498,34 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
+static void ip_rt_fix_tos(struct flowi4 *fl4)
+{
+ __u8 tos = RT_FL_TOS(fl4);
+
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+ if (tos & RTO_ONLINK)
+ fl4->flowi4_scope = RT_SCOPE_LINK;
+}
+
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
- const struct sock *sk,
- const struct iphdr *iph,
- int oif, u8 tos,
- u8 prot, u32 mark, int flow_flags)
+ const struct sock *sk, const struct iphdr *iph,
+ int oif, __u8 tos, u8 prot, u32 mark,
+ int flow_flags)
{
+ __u8 scope = RT_SCOPE_UNIVERSE;
+
if (sk) {
const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if;
mark = sk->sk_mark;
- tos = RT_CONN_FLAGS(sk);
+ tos = ip_sock_rt_tos(sk);
+ scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
}
- flowi4_init_output(fl4, oif, mark, tos,
- RT_SCOPE_UNIVERSE, prot,
- flow_flags,
- iph->daddr, iph->saddr, 0, 0,
+
+ flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
+ prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk));
}
@@ -526,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
const struct net *net = dev_net(skb->dev);
const struct iphdr *iph = ip_hdr(skb);
int oif = skb->dev->ifindex;
- u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
+ __u8 tos = iph->tos;
__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
}
@@ -544,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
- RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+ ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
@@ -817,9 +827,9 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct net *net = dev_net(skb->dev);
int oif = skb->dev->ifindex;
- u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
+ __u8 tos = iph->tos;
rt = (struct rtable *) dst;
@@ -936,6 +946,7 @@ static int ip_error(struct sk_buff *skb)
struct inet_peer *peer;
unsigned long now;
struct net *net;
+ SKB_DR(reason);
bool send;
int code;
@@ -955,10 +966,12 @@ static int ip_error(struct sk_buff *skb)
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
case EHOSTUNREACH:
+ SKB_DR_SET(reason, IP_INADDRERRORS);
__IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
break;
case ENETUNREACH:
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
}
@@ -974,6 +987,7 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
@@ -1000,7 +1014,7 @@ static int ip_error(struct sk_buff *skb)
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
-out: kfree_skb(skb);
+out: kfree_skb_reason(skb, reason);
return 0;
}
@@ -1064,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
struct rtable *rt;
u32 mark = IP4_REPLY_MARK(net, skb->mark);
- __build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, 0);
+ __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark,
+ 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1153,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, 0, 0);
+ __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
@@ -1298,9 +1311,10 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
+ struct net *net = dev_net(dst->dev);
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
- ip_rt_min_advmss);
+ net->ipv4.ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
}
@@ -1384,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
struct fib_info *fi = res->fi;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
@@ -1485,6 +1499,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
struct uncached_list {
spinlock_t lock;
struct list_head head;
+ struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
@@ -1506,7 +1521,7 @@ void rt_del_uncached_list(struct rtable *rt)
struct uncached_list *ul = rt->rt_uncached_list;
spin_lock_bh(&ul->lock);
- list_del(&rt->rt_uncached);
+ list_del_init(&rt->rt_uncached);
spin_unlock_bh(&ul->lock);
}
}
@@ -1521,20 +1536,23 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
void rt_flush_dev(struct net_device *dev)
{
- struct rtable *rt;
+ struct rtable *rt, *safe;
int cpu;
for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
+ if (list_empty(&ul->head))
+ continue;
+
spin_lock_bh(&ul->lock);
- list_for_each_entry(rt, &ul->head, rt_uncached) {
+ list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) {
if (rt->dst.dev != dev)
continue;
rt->dst.dev = blackhole_netdev;
- dev_replace_track(dev, blackhole_netdev,
- &rt->dst.dev_tracker,
- GFP_ATOMIC);
+ netdev_ref_replace(dev, blackhole_netdev,
+ &rt->dst.dev_tracker, GFP_ATOMIC);
+ list_move(&rt->rt_uncached, &ul->quarantine);
}
spin_unlock_bh(&ul->lock);
}
@@ -1608,12 +1626,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
- bool nopolicy, bool noxfrm)
+ bool noxfrm)
{
struct rtable *rt;
rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- (nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
if (rt) {
@@ -1718,8 +1735,11 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (our)
flags |= RTCF_LOCAL;
+ if (IN_DEV_ORCONF(in_dev, NOPOLICY))
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ false);
if (!rth)
return -ENOBUFS;
@@ -1735,6 +1755,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
RT_CACHE_STAT_INC(in_slow_mc);
+ skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
return 0;
}
@@ -1822,6 +1843,9 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ if (IN_DEV_ORCONF(in_dev, NOPOLICY))
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
@@ -1835,7 +1859,6 @@ static int __mkroute_input(struct sk_buff *skb,
}
rth = rt_dst_alloc(out_dev->dev, 0, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY),
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
err = -ENOBUFS;
@@ -1901,7 +1924,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
const struct sk_buff *skb,
bool *p_has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -1930,7 +1953,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
const struct sk_buff *skb,
bool has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
/* We assume the packet carries an encapsulation, but if none was
@@ -1990,7 +2013,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net,
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
const struct flowi4 *fl4)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
@@ -2020,7 +2043,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
struct flow_keys hash_keys;
u32 mhash = 0;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -2258,6 +2281,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
/*
* Now we are ready to route packet.
*/
+ fl4.flowi4_l3mdev = 0;
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
@@ -2327,6 +2351,9 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
+ if (IN_DEV_ORCONF(in_dev, NOPOLICY))
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
do_cache &= res->fi && !itag;
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2340,8 +2367,7 @@ local_input:
}
rth = rt_dst_alloc(ip_rt_get_dev(net, res),
- flags | RTCF_LOCAL, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ flags | RTCF_LOCAL, res->type, false);
if (!rth)
goto e_nobufs;
@@ -2406,24 +2432,9 @@ martian_source:
goto out;
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
-{
- struct fib_result res;
- int err;
-
- tos &= IPTOS_RT_MASK;
- rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
- rcu_read_unlock();
-
- return err;
-}
-EXPORT_SYMBOL(ip_route_input_noref);
-
/* called with rcu_read_lock held */
-int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, struct fib_result *res)
+static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
* The problem was that too many Ethernet cards have broken/missing
@@ -2472,6 +2483,21 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
}
+int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev)
+{
+ struct fib_result res;
+ int err;
+
+ tos &= IPTOS_RT_MASK;
+ rcu_read_lock();
+ err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ rcu_read_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL(ip_route_input_noref);
+
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
const struct flowi4 *fl4, int orig_oif,
@@ -2564,7 +2590,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
add:
rth = rt_dst_alloc(dev_out, flags, type,
- IN_DEV_ORCONF(in_dev, NOPOLICY),
IN_DEV_ORCONF(in_dev, NOXFRM));
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -2603,7 +2628,6 @@ add:
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
{
- __u8 tos = RT_FL_TOS(fl4);
struct fib_result res = {
.type = RTN_UNSPEC,
.fi = NULL,
@@ -2613,9 +2637,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+ ip_rt_fix_tos(fl4);
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -2733,8 +2755,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
res->fi = NULL;
res->table = NULL;
if (fl4->flowi4_oif &&
- (ipv4_is_multicast(fl4->daddr) ||
- !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
+ (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {
/* Apparently, routing tables are wrong. Assume,
* that the destination is on link.
*
@@ -2821,7 +2842,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
new->output = dst_discard_out;
new->dev = net->loopback_dev;
- dev_hold_track(new->dev, &new->dev_tracker, GFP_ATOMIC);
+ netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC);
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
@@ -3379,7 +3400,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
- fri.tos = fl4.flowi4_tos;
+ fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
@@ -3392,11 +3413,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
- fa->fa_tos == fri.tos &&
+ fa->fa_dscp == fri.dscp &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
- fri.offload = fa->offload;
- fri.trap = fa->trap;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
break;
}
}
@@ -3535,13 +3556,6 @@ static struct ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "min_adv_mss",
- .data = &ip_rt_min_advmss,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
@@ -3569,6 +3583,13 @@ static struct ctl_table ipv4_route_netns_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "min_adv_mss",
+ .data = &init_net.ipv4.ip_rt_min_advmss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ },
};
@@ -3631,6 +3652,7 @@ static __net_init int netns_ip_rt_init(struct net *net)
/* Set default value for namespaceified sysctls */
net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+ net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
return 0;
}
@@ -3642,7 +3664,7 @@ static __net_init int rt_genid_init(struct net *net)
{
atomic_set(&net->ipv4.rt_genid, 0);
atomic_set(&net->fnhe_genid, 0);
- atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
+ atomic_set(&net->ipv4.dev_addr_genid, get_random_u32());
return 0;
}
@@ -3697,7 +3719,7 @@ int __init ip_rt_init(void)
ip_idents = idents_hash;
- prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
+ get_random_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);
@@ -3705,6 +3727,7 @@ int __init ip_rt_init(void)
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
+ INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2cb3b852d148..942d2dfa1115 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net,
return true;
}
- if (!net->ipv4.sysctl_tcp_timestamps)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net,
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return net->ipv4.sysctl_tcp_window_scaling != 0;
+ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
if (!ecn_ok)
return false;
- if (net->ipv4.sysctl_tcp_ecn)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
return true;
return dst_feature(dst, RTAX_FEATURE_ECN);
@@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk,
struct sk_buff *skb)
{
@@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
return NULL;
treq = tcp_rsk(req);
+
+ /* treq->af_specific might be used to perform TCP_MD5 lookup */
+ treq->af_specific = af_ops;
+
treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
#if IS_ENABLED(CONFIG_MPTCP)
treq->is_mptcp = sk_is_mptcp(sk);
@@ -335,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct flowi4 fl4;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
@@ -364,7 +370,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 97eb54774924..9b8a6db7a66b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -20,10 +20,6 @@
#include <net/protocol.h>
#include <net/netevent.h>
-static int two = 2;
-static int three __maybe_unused = 3;
-static int four = 4;
-static int thousand = 1000;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -43,6 +39,7 @@ static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -88,7 +85,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
* port limit.
*/
if ((range[1] < range[0]) ||
- (range[0] < net->ipv4.sysctl_ip_prot_sock))
+ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
set_local_port_range(net, range);
@@ -114,7 +111,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
.extra2 = &ip_privileged_port_max,
};
- pports = net->ipv4.sysctl_ip_prot_sock;
+ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
@@ -126,7 +123,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
if (range[0] < pports)
ret = -EINVAL;
else
- net->ipv4.sysctl_ip_prot_sock = pports;
+ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
}
return ret;
@@ -354,61 +351,6 @@ bad_key:
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
@@ -441,6 +383,29 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
return ret;
}
+static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_child_ehash_entries);
+ struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
+ int tcp_ehash_entries;
+ struct ctl_table tbl;
+
+ tcp_ehash_entries = hinfo->ehash_mask + 1;
+
+ /* A negative number indicates that the child netns
+ * shares the global ehash.
+ */
+ if (!net_eq(net, &init_net) && !hinfo->pernet)
+ tcp_ehash_entries *= -1;
+
+ tbl.data = &tcp_ehash_entries;
+ tbl.maxlen = sizeof(int);
+
+ return proc_dointvec(&tbl, write, buffer, lenp, ppos);
+}
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp,
@@ -590,11 +555,20 @@ static struct ctl_table ipv4_table[] = {
static struct ctl_table ipv4_net_table[] = {
{
+ .procname = "tcp_max_tw_buckets",
+ .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "icmp_echo_ignore_all",
.data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_echo_enable_probe",
@@ -611,6 +585,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_ignore_bogus_error_responses",
@@ -618,6 +594,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_errors_use_inbound_ifaddr",
@@ -625,6 +603,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_ratelimit",
@@ -664,6 +644,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "tcp_ecn_fallback",
@@ -671,6 +653,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "ip_dynaddr",
@@ -691,14 +675,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "nexthop_compat_mode",
@@ -998,14 +982,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
- },
- {
- .procname = "tcp_max_tw_buckets",
- .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "tcp_max_syn_backlog",
@@ -1058,7 +1035,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "fib_multipath_hash_fields",
@@ -1116,7 +1093,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &four,
+ .extra2 = SYSCTL_FOUR,
},
{
.procname = "tcp_recovery",
@@ -1271,6 +1248,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ONE,
},
{
+ .procname = "tcp_tso_rtt_log",
+ .data = &init_net.ipv4.sysctl_tcp_tso_rtt_log,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ },
+ {
.procname = "tcp_min_rtt_wlen",
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
@@ -1302,7 +1286,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "tcp_pacing_ca_ratio",
@@ -1311,7 +1295,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "tcp_wmem",
@@ -1361,6 +1345,21 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = SYSCTL_ONE,
},
{
+ .procname = "tcp_ehash_entries",
+ .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries,
+ .mode = 0444,
+ .proc_handler = proc_tcp_ehash_entries,
+ },
+ {
+ .procname = "tcp_child_ehash_entries",
+ .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_child_ehash_entries_max,
+ },
+ {
.procname = "udp_rmem_min",
.data = &init_net.ipv4.sysctl_udp_rmem_min,
.maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
@@ -1383,7 +1382,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b75836db19b..54836a6b81d6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -294,6 +294,8 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
+DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
#if IS_ENABLED(CONFIG_SMC)
DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
@@ -429,7 +431,7 @@ void tcp_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
@@ -441,7 +443,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -452,9 +454,10 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
+ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+ set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
}
EXPORT_SYMBOL(tcp_init_sock);
@@ -686,9 +689,10 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
- refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
+ tcp_skb_can_collapse_to(skb);
}
void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -855,9 +859,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
{
struct sk_buff *skb;
- if (unlikely(tcp_under_memory_pressure(sk)))
- sk_mem_reclaim_partial(sk);
-
skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
if (likely(skb)) {
bool mem_scheduled;
@@ -893,8 +894,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
- new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
- new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+ new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
/* We try hard to avoid divides here */
size_goal = tp->gso_segs * mss_now;
@@ -936,6 +936,40 @@ void tcp_remove_empty_skb(struct sock *sk)
}
}
+/* skb changing from pure zc to mixed, must charge zc */
+static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
+{
+ if (unlikely(skb_zcopy_pure(skb))) {
+ u32 extra = skb->truesize -
+ SKB_TRUESIZE(skb_end_offset(skb));
+
+ if (!sk_wmem_schedule(sk, extra))
+ return -ENOMEM;
+
+ sk_mem_charge(sk, extra);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+ return 0;
+}
+
+
+static int tcp_wmem_schedule(struct sock *sk, int copy)
+{
+ int left;
+
+ if (likely(sk_wmem_schedule(sk, copy)))
+ return copy;
+
+ /* We could be in trouble if we have nothing queued.
+ * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0]
+ * to guarantee some progress.
+ */
+ left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued;
+ if (left > 0)
+ sk_forced_mem_schedule(sk, min(left, copy));
+ return min(copy, sk->sk_forward_alloc);
+}
+
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size)
{
@@ -967,18 +1001,22 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb))
+ return NULL;
+
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
return NULL;
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
get_page(page);
- skb_fill_page_desc(skb, i, page, offset, copy);
+ skb_fill_page_desc_noacc(skb, i, page, offset, copy);
}
if (!(flags & MSG_NO_SHARED_FRAGS))
@@ -1125,16 +1163,16 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
}
}
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
- int *copied, size_t size,
- struct ubuf_info *uarg)
+int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
+ size_t size, struct ubuf_info *uarg)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1186,17 +1224,23 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
flags = msg->msg_flags;
- if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
+ if ((flags & MSG_ZEROCOPY) && size) {
skb = tcp_write_queue_tail(sk);
- uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
- if (!uarg) {
- err = -ENOBUFS;
- goto out_err;
- }
- zc = sk->sk_route_caps & NETIF_F_SG;
- if (!zc)
- uarg->zerocopy = 0;
+ if (msg->msg_ubuf) {
+ uarg = msg->msg_ubuf;
+ net_zcopy_get(uarg);
+ zc = sk->sk_route_caps & NETIF_F_SG;
+ } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
+ if (!uarg) {
+ err = -ENOBUFS;
+ goto out_err;
+ }
+ zc = sk->sk_route_caps & NETIF_F_SG;
+ if (!zc)
+ uarg_to_msgzc(uarg)->zerocopy = 0;
+ }
}
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1310,7 +1354,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags) {
+ if (i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1319,16 +1363,14 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- /* skb changing from pure zc to mixed, must charge zc */
- if (unlikely(skb_zcopy_pure(skb))) {
- if (!sk_wmem_schedule(sk, skb->data_len))
+ if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
+ if (tcp_downgrade_zcopy_pure(sk, skb))
goto wait_for_space;
-
- sk_mem_charge(sk, skb->data_len);
- skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ skb_zcopy_downgrade_managed(skb);
}
- if (!sk_wmem_schedule(sk, copy))
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
goto wait_for_space;
err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
@@ -1355,7 +1397,8 @@ new_segment:
skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
if (!skb_zcopy_pure(skb)) {
- if (!sk_wmem_schedule(sk, copy))
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
goto wait_for_space;
}
@@ -1524,17 +1567,11 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
-void tcp_cleanup_rbuf(struct sock *sk, int copied)
+static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
-
- WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
- "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
- tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
-
if (inet_csk_ack_scheduled(sk)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1580,19 +1617,16 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
tcp_send_ack(sk);
}
-void __sk_defer_free_flush(struct sock *sk)
+void tcp_cleanup_rbuf(struct sock *sk, int copied)
{
- struct llist_node *head;
- struct sk_buff *skb, *n;
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ struct tcp_sock *tp = tcp_sk(sk);
- head = llist_del_all(&sk->defer_list);
- llist_for_each_entry_safe(skb, n, head, ll_node) {
- prefetch(n);
- skb_mark_not_on_list(skb);
- __kfree_skb(skb);
- }
+ WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
+ "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
+ tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
+ __tcp_cleanup_rbuf(sk, copied);
}
-EXPORT_SYMBOL(__sk_defer_free_flush);
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{
@@ -1601,16 +1635,12 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
sock_rfree(skb);
skb->destructor = NULL;
skb->sk = NULL;
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- !llist_empty(&sk->defer_list)) {
- llist_add(&skb->ll_node, &sk->defer_list);
- return;
- }
+ return skb_attempt_defer_free(skb);
}
__kfree_skb(skb);
}
-static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
{
struct sk_buff *skb;
u32 offset;
@@ -1633,6 +1663,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
}
return NULL;
}
+EXPORT_SYMBOL(tcp_recv_skb);
/*
* This routine provides an alternative to tcp_recvmsg() for routines
@@ -1675,11 +1706,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (!copied)
copied = used;
break;
- } else if (used <= len) {
- seq += used;
- copied += used;
- offset += used;
}
+ if (WARN_ON_ONCE(used > len))
+ used = len;
+ seq += used;
+ copied += used;
+ offset += used;
+
/* If recv_actor drops the lock (e.g. TCP splice
* receive) the skb pointer might be invalid when
* getting here: tcp_collapse might have deleted it
@@ -1717,6 +1750,90 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
}
EXPORT_SYMBOL(tcp_read_sock);
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 seq = tp->copied_seq;
+ struct sk_buff *skb;
+ int copied = 0;
+ u32 offset;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -ENOTCONN;
+
+ while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ u8 tcp_flags;
+ int used;
+
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+ tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
+ used = recv_actor(sk, skb);
+ consume_skb(skb);
+ if (used < 0) {
+ if (!copied)
+ copied = used;
+ break;
+ }
+ seq += used;
+ copied += used;
+
+ if (tcp_flags & TCPHDR_FIN) {
+ ++seq;
+ break;
+ }
+ }
+ WRITE_ONCE(tp->copied_seq, seq);
+
+ tcp_rcv_space_adjust(sk);
+
+ /* Clean up data we have read: This will do ACK frames. */
+ if (copied > 0)
+ __tcp_cleanup_rbuf(sk, copied);
+
+ return copied;
+}
+EXPORT_SYMBOL(tcp_read_skb);
+
+void tcp_read_done(struct sock *sk, size_t len)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 seq = tp->copied_seq;
+ struct sk_buff *skb;
+ size_t left;
+ u32 offset;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return;
+
+ left = len;
+ while (left && (skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ int used;
+
+ used = min_t(size_t, skb->len - offset, left);
+ seq += used;
+ left -= used;
+
+ if (skb->len > offset + used)
+ break;
+
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ tcp_eat_recv_skb(sk, skb);
+ ++seq;
+ break;
+ }
+ tcp_eat_recv_skb(sk, skb);
+ }
+ WRITE_ONCE(tp->copied_seq, seq);
+
+ tcp_rcv_space_adjust(sk);
+
+ /* Clean up data we have read: This will do ACK frames. */
+ if (left != len)
+ tcp_cleanup_rbuf(sk, len - left);
+}
+EXPORT_SYMBOL(tcp_read_done);
+
int tcp_peek_len(struct socket *sock)
{
return tcp_inq(sock->sk);
@@ -1731,7 +1848,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
val = min(val, cap);
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
@@ -1866,8 +1983,7 @@ static void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
}
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags);
static int receive_fallback_to_copy(struct sock *sk,
struct tcp_zerocopy_receive *zc, int inq,
@@ -1889,7 +2005,7 @@ static int receive_fallback_to_copy(struct sock *sk,
if (err)
return err;
- err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
+ err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT,
tss, &zc->msg_flags);
if (err < 0)
return err;
@@ -2305,8 +2421,7 @@ static int tcp_inq_hint(struct sock *sk)
*/
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2324,9 +2439,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN)
goto out;
- if (tp->recvmsg_inq)
+ if (tp->recvmsg_inq) {
*cmsg_flags = TCP_CMSG_INQ;
- timeo = sock_rcvtimeo(sk, nonblock);
+ msg->msg_get_inq = 1;
+ }
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
/* Urgent data needs to be handled specially. */
if (flags & MSG_OOB)
@@ -2444,7 +2561,6 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
__sk_flush_backlog(sk);
} else {
tcp_cleanup_rbuf(sk, copied);
- sk_defer_free_flush(sk);
sk_wait_data(sk, &timeo, last);
}
@@ -2545,10 +2661,10 @@ recv_sndq:
goto out;
}
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
- int cmsg_flags = 0, ret, inq;
+ int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE))
@@ -2557,20 +2673,20 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
- sk_busy_loop(sk, nonblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
lock_sock(sk);
- ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss,
- &cmsg_flags);
+ ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
release_sock(sk);
- sk_defer_free_flush(sk);
- if (cmsg_flags && ret >= 0) {
+ if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
- if (cmsg_flags & TCP_CMSG_INQ) {
- inq = tcp_inq_hint(sk);
- put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ if (msg->msg_get_inq) {
+ msg->msg_inq = tcp_inq_hint(sk);
+ if (cmsg_flags & TCP_CMSG_INQ)
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
+ sizeof(msg->msg_inq), &msg->msg_inq);
}
}
return ret;
@@ -2724,7 +2840,8 @@ static void tcp_orphan_update(struct timer_list *unused)
static bool tcp_too_many_orphans(int shift)
{
- return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans;
+ return READ_ONCE(tcp_orphan_cache) << shift >
+ READ_ONCE(sysctl_tcp_max_orphans);
}
bool tcp_check_oom(struct sock *sk, int shift)
@@ -2771,8 +2888,6 @@ void __tcp_close(struct sock *sk, long timeout)
__kfree_skb(skb);
}
- sk_mem_reclaim(sk);
-
/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
if (sk->sk_state == TCP_CLOSE)
goto adjudge_to_death;
@@ -2880,7 +2995,6 @@ adjudge_to_death:
}
}
if (sk->sk_state != TCP_CLOSE) {
- sk_mem_reclaim(sk);
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -2958,7 +3072,6 @@ void tcp_write_queue_purge(struct sock *sk)
}
tcp_rtx_queue_purge(sk);
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
- sk_mem_reclaim(sk);
tcp_clear_all_retrans_hints(tcp_sk(sk));
tcp_sk(sk)->packets_out = 0;
inet_csk(sk)->icsk_backoff = 0;
@@ -3022,8 +3135,10 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto_min = TCP_RTO_MIN;
icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
+ tp->is_cwnd_limited = 0;
+ tp->max_packets_out = 0;
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
@@ -3088,7 +3203,6 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_frag.page = NULL;
sk->sk_frag.offset = 0;
}
- sk_defer_free_flush(sk);
sk_error_report(sk);
return 0;
}
@@ -3096,7 +3210,7 @@ EXPORT_SYMBOL(tcp_disconnect);
static inline bool tcp_can_repair_sock(const struct sock *sk)
{
- return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
+ return sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
(sk->sk_state != TCP_LISTEN);
}
@@ -3373,8 +3487,8 @@ int tcp_set_window_clamp(struct sock *sk, int val)
/*
* Socket option code for TCP.
*/
-static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen)
+int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3396,11 +3510,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
name[val] = 0;
- lock_sock(sk);
- err = tcp_set_congestion_control(sk, name, true,
- ns_capable(sock_net(sk)->user_ns,
- CAP_NET_ADMIN));
- release_sock(sk);
+ sockopt_lock_sock(sk);
+ err = tcp_set_congestion_control(sk, name, !has_current_bpf_ctx(),
+ sockopt_ns_capable(sock_net(sk)->user_ns,
+ CAP_NET_ADMIN));
+ sockopt_release_sock(sk);
return err;
}
case TCP_ULP: {
@@ -3416,9 +3530,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
name[val] = 0;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
err = tcp_set_ulp(sk, name);
- release_sock(sk);
+ sockopt_release_sock(sk);
return err;
}
case TCP_FASTOPEN_KEY: {
@@ -3451,7 +3565,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
switch (optname) {
case TCP_MAXSEG:
@@ -3533,7 +3647,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_REPAIR_OPTIONS:
if (!tp->repair)
err = -EINVAL;
- else if (sk->sk_state == TCP_ESTABLISHED)
+ else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent)
err = tcp_repair_options_est(sk, optval, optlen);
else
err = -EPERM;
@@ -3626,7 +3740,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3672,7 +3787,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
break;
}
- release_sock(sk);
+ sockopt_release_sock(sk);
return err;
}
@@ -3682,8 +3797,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
const struct inet_connection_sock *icsk = inet_csk(sk);
if (level != SOL_TCP)
- return icsk->icsk_af_ops->setsockopt(sk, level, optname,
- optval, optlen);
+ /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
+ return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname,
+ optval, optlen);
return do_tcp_setsockopt(sk, level, optname, optval, optlen);
}
EXPORT_SYMBOL(tcp_setsockopt);
@@ -3733,7 +3849,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_max_pacing_rate = rate64;
info->tcpi_reordering = tp->reordering;
- info->tcpi_snd_cwnd = tp->snd_cwnd;
+ info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);
if (info->tcpi_state == TCP_LISTEN) {
/* listeners aliased fields :
@@ -3904,7 +4020,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
rate64 = tcp_compute_delivery_rate(tp);
nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
- nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+ nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
@@ -3936,15 +4052,15 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
return stats;
}
-static int do_tcp_getsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int __user *optlen)
+int do_tcp_getsockopt(struct sock *sk, int level,
+ int optname, sockptr_t optval, sockptr_t optlen)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
int val, len;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
len = min_t(unsigned int, len, sizeof(int));
@@ -3976,12 +4092,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -3993,15 +4110,15 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_INFO: {
struct tcp_info info;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
tcp_get_info(sk, &info);
len = min_t(unsigned int, len, sizeof(info));
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &info, len))
+ if (copy_to_sockptr(optval, &info, len))
return -EFAULT;
return 0;
}
@@ -4011,7 +4128,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
size_t sz = 0;
int attr;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
ca_ops = icsk->icsk_ca_ops;
@@ -4019,9 +4136,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
sz = ca_ops->get_info(sk, ~0U, &attr, &info);
len = min_t(unsigned int, len, sz);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &info, len))
+ if (copy_to_sockptr(optval, &info, len))
return -EFAULT;
return 0;
}
@@ -4030,27 +4147,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_CONGESTION:
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
+ if (copy_to_sockptr(optval, icsk->icsk_ca_ops->name, len))
return -EFAULT;
return 0;
case TCP_ULP:
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
if (!icsk->icsk_ulp_ops) {
- if (put_user(0, optlen))
+ len = 0;
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
return 0;
}
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len))
+ if (copy_to_sockptr(optval, icsk->icsk_ulp_ops->name, len))
return -EFAULT;
return 0;
@@ -4058,15 +4176,15 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)];
unsigned int key_len;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
key_len = tcp_fastopen_get_cipher(net, icsk, key) *
TCP_FASTOPEN_KEY_LENGTH;
len = min_t(unsigned int, len, key_len);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, key, len))
+ if (copy_to_sockptr(optval, key, len))
return -EFAULT;
return 0;
}
@@ -4092,7 +4210,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_REPAIR_WINDOW: {
struct tcp_repair_window opt;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len != sizeof(opt))
@@ -4107,7 +4225,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
opt.rcv_wnd = tp->rcv_wnd;
opt.rcv_wup = tp->rcv_wup;
- if (copy_to_user(optval, &opt, len))
+ if (copy_to_sockptr(optval, &opt, len))
return -EFAULT;
return 0;
}
@@ -4153,35 +4271,35 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tp->save_syn;
break;
case TCP_SAVED_SYN: {
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
if (tp->saved_syn) {
if (len < tcp_saved_syn_len(tp->saved_syn)) {
- if (put_user(tcp_saved_syn_len(tp->saved_syn),
- optlen)) {
- release_sock(sk);
+ len = tcp_saved_syn_len(tp->saved_syn);
+ if (copy_to_sockptr(optlen, &len, sizeof(int))) {
+ sockopt_release_sock(sk);
return -EFAULT;
}
- release_sock(sk);
+ sockopt_release_sock(sk);
return -EINVAL;
}
len = tcp_saved_syn_len(tp->saved_syn);
- if (put_user(len, optlen)) {
- release_sock(sk);
+ if (copy_to_sockptr(optlen, &len, sizeof(int))) {
+ sockopt_release_sock(sk);
return -EFAULT;
}
- if (copy_to_user(optval, tp->saved_syn->data, len)) {
- release_sock(sk);
+ if (copy_to_sockptr(optval, tp->saved_syn->data, len)) {
+ sockopt_release_sock(sk);
return -EFAULT;
}
tcp_saved_syn_free(tp);
- release_sock(sk);
+ sockopt_release_sock(sk);
} else {
- release_sock(sk);
+ sockopt_release_sock(sk);
len = 0;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
}
return 0;
@@ -4192,32 +4310,31 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
struct tcp_zerocopy_receive zc = {};
int err;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len < 0 ||
len < offsetofend(struct tcp_zerocopy_receive, length))
return -EINVAL;
if (unlikely(len > sizeof(zc))) {
- err = check_zeroed_user(optval + sizeof(zc),
- len - sizeof(zc));
+ err = check_zeroed_sockptr(optval, sizeof(zc),
+ len - sizeof(zc));
if (err < 1)
return err == 0 ? -EINVAL : err;
len = sizeof(zc);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
}
- if (copy_from_user(&zc, optval, len))
+ if (copy_from_sockptr(&zc, optval, len))
return -EFAULT;
if (zc.reserved)
return -EINVAL;
if (zc.msg_flags & ~(TCP_VALID_ZC_MSG_FLAGS))
return -EINVAL;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
err = tcp_zerocopy_receive(sk, &zc, &tss);
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
&zc, &len, err);
- release_sock(sk);
- sk_defer_free_flush(sk);
+ sockopt_release_sock(sk);
if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
goto zerocopy_rcv_cmsg;
switch (len) {
@@ -4247,7 +4364,7 @@ zerocopy_rcv_sk_err:
zerocopy_rcv_inq:
zc.inq = tcp_inq_hint(sk);
zerocopy_rcv_out:
- if (!err && copy_to_user(optval, &zc, len))
+ if (!err && copy_to_sockptr(optval, &zc, len))
err = -EFAULT;
return err;
}
@@ -4256,9 +4373,9 @@ zerocopy_rcv_out:
return -ENOPROTOOPT;
}
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, len))
+ if (copy_to_sockptr(optval, &val, len))
return -EFAULT;
return 0;
}
@@ -4281,9 +4398,11 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
struct inet_connection_sock *icsk = inet_csk(sk);
if (level != SOL_TCP)
- return icsk->icsk_af_ops->getsockopt(sk, level, optname,
- optval, optlen);
- return do_tcp_getsockopt(sk, level, optname, optval, optlen);
+ /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
+ return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname,
+ optval, optlen);
+ return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval),
+ USER_SOCKPTR(optlen));
}
EXPORT_SYMBOL(tcp_getsockopt);
@@ -4329,12 +4448,16 @@ static void __tcp_alloc_md5sig_pool(void)
* to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool_populated = true;
+ /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
+ * and tcp_get_md5sig_pool().
+ */
+ WRITE_ONCE(tcp_md5sig_pool_populated, true);
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool_populated)) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
mutex_lock(&tcp_md5sig_mutex);
if (!tcp_md5sig_pool_populated) {
@@ -4345,7 +4468,8 @@ bool tcp_alloc_md5sig_pool(void)
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool_populated;
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ return READ_ONCE(tcp_md5sig_pool_populated);
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -4361,7 +4485,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
local_bh_disable();
- if (tcp_md5sig_pool_populated) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (READ_ONCE(tcp_md5sig_pool_populated)) {
/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
smp_rmb();
return this_cpu_ptr(&tcp_md5sig_pool);
@@ -4423,6 +4548,82 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke
}
EXPORT_SYMBOL(tcp_md5_hash_key);
+/* Called with rcu_read_lock() */
+enum skb_drop_reason
+tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif)
+{
+ /*
+ * This gets called for each TCP segment that arrives
+ * so we want to be efficient.
+ * We have 3 drop cases:
+ * o No MD5 hash and one expected.
+ * o MD5 hash and we're not expecting one.
+ * o MD5 hash and its wrong.
+ */
+ const __u8 *hash_location = NULL;
+ struct tcp_md5sig_key *hash_expected;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int genhash, l3index;
+ u8 newhash[16];
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family);
+ hash_location = tcp_parse_md5sig_option(th);
+
+ /* We've parsed the options - do we have a hash? */
+ if (!hash_expected && !hash_location)
+ return SKB_NOT_DROPPED_YET;
+
+ if (hash_expected && !hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ return SKB_DROP_REASON_TCP_MD5NOTFOUND;
+ }
+
+ if (!hash_expected && hash_location) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
+ }
+
+ /* Check the signature.
+ * To support dual stack listeners, we need to handle
+ * IPv4-mapped case.
+ */
+ if (family == AF_INET)
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+ else
+ genhash = tp->af_specific->calc_md5_hash(newhash,
+ hash_expected,
+ NULL, skb);
+
+ if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
+ if (family == AF_INET) {
+ net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
+ saddr, ntohs(th->source),
+ daddr, ntohs(th->dest),
+ genhash ? " tcp_v4_calc_md5_hash failed"
+ : "", l3index);
+ } else {
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
+ genhash ? "failed" : "mismatch",
+ saddr, ntohs(th->source),
+ daddr, ntohs(th->dest), l3index);
+ }
+ return SKB_DROP_REASON_TCP_MD5FAILURE;
+ }
+ return SKB_NOT_DROPPED_YET;
+}
+EXPORT_SYMBOL(tcp_inbound_md5_hash);
+
#endif
void tcp_done(struct sock *sk)
@@ -4454,16 +4655,24 @@ EXPORT_SYMBOL_GPL(tcp_done);
int tcp_abort(struct sock *sk, int err)
{
- if (!sk_fullsock(sk)) {
- if (sk->sk_state == TCP_NEW_SYN_RECV) {
- struct request_sock *req = inet_reqsk(sk);
+ int state = inet_sk_state_load(sk);
- local_bh_disable();
- inet_csk_reqsk_queue_drop(req->rsk_listener, req);
- local_bh_enable();
- return 0;
- }
- return -EOPNOTSUPP;
+ if (state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ local_bh_disable();
+ inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+ local_bh_enable();
+ return 0;
+ }
+ if (state == TCP_TIME_WAIT) {
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+
+ refcount_inc(&tw->tw_refcnt);
+ local_bh_disable();
+ inet_twsk_deschedule_put(tw);
+ local_bh_enable();
+ return 0;
}
/* Don't race with userspace socket closes such as tcp_close. */
@@ -4539,7 +4748,6 @@ void __init tcp_init(void)
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
- inet_hashinfo_init(&tcp_hashinfo);
inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
thash_entries, 21, /* one slot per 2 MB*/
0, 64 * 1024);
@@ -4549,6 +4757,12 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
+ tcp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("tcp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4572,7 +4786,7 @@ void __init tcp_init(void)
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
alloc_large_system_hash("TCP bind",
- sizeof(struct inet_bind_hashbucket),
+ 2 * sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4581,11 +4795,15 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
+ tcp_hashinfo.bhash2 = tcp_hashinfo.bhash + tcp_hashinfo.bhash_size;
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
+ spin_lock_init(&tcp_hashinfo.bhash2[i].lock);
+ INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
+ tcp_hashinfo.pernet = false;
cnt = tcp_hashinfo.ehash_mask + 1;
sysctl_tcp_max_orphans = cnt / 2;
@@ -4596,11 +4814,11 @@ void __init tcp_init(void)
max_wshare = min(4UL*1024*1024, limit);
max_rshare = min(6UL*1024*1024, limit);
- init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;
init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
- init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
+ init_net.ipv4.sysctl_tcp_rmem[0] = PAGE_SIZE;
init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index ec5550089b4d..54eec33c6e1c 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
- bw = (u64)tp->snd_cwnd * BW_UNIT;
+ bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
@@ -310,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
*/
bytes = min_t(unsigned long,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
return min(segs, 0x7FU);
@@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
- bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
+ bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
- bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+ bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
@@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
@@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
+ u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
if (!acked)
goto done; /* no packet fully ACKed; just apply caps */
@@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
- tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
@@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
bbr->ack_epoch_acked + rs->acked_sacked);
extra_acked = bbr->ack_epoch_acked - expected_acked;
- extra_acked = min(extra_acked, tp->snd_cwnd);
+ extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
}
@@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
return;
bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
- tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
bbr_reset_mode(sk);
}
@@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
bbr->full_bw_cnt = 0;
bbr_reset_lt_bw_sampling(sk);
- return tcp_sk(sk)->snd_cwnd;
+ return tcp_snd_cwnd(tcp_sk(sk));
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
@@ -1154,38 +1154,40 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.set_state = bbr_set_state,
};
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET8_START(tcp_bbr_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
-BTF_ID(func, bbr_init)
-BTF_ID(func, bbr_main)
-BTF_ID(func, bbr_sndbuf_expand)
-BTF_ID(func, bbr_undo_cwnd)
-BTF_ID(func, bbr_cwnd_event)
-BTF_ID(func, bbr_ssthresh)
-BTF_ID(func, bbr_min_tso_segs)
-BTF_ID(func, bbr_set_state)
+BTF_ID_FLAGS(func, bbr_init)
+BTF_ID_FLAGS(func, bbr_main)
+BTF_ID_FLAGS(func, bbr_sndbuf_expand)
+BTF_ID_FLAGS(func, bbr_undo_cwnd)
+BTF_ID_FLAGS(func, bbr_cwnd_event)
+BTF_ID_FLAGS(func, bbr_ssthresh)
+BTF_ID_FLAGS(func, bbr_min_tso_segs)
+BTF_ID_FLAGS(func, bbr_set_state)
#endif
#endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET8_END(tcp_bbr_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &tcp_bbr_check_kfunc_ids,
+};
static int __init bbr_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
- ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
- if (ret)
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index f5f588b1f6e9..58358bf92e1b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd);
+ bictcp_update(ca, tcp_snd_cwnd(tp));
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- if (tp->snd_cwnd <= low_window)
- return max(tp->snd_cwnd >> 1U, 2U);
+ if (tcp_snd_cwnd(tp) <= low_window)
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
else
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 9b9b02052fd3..cf9c3e8f7ccb 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -138,10 +138,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
struct sk_psock *psock = sk_psock_get(sk);
int ret;
- if (unlikely(!psock)) {
- sk_msg_free(sk, msg);
- return 0;
- }
+ if (unlikely(!psock))
+ return -EPIPE;
+
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
@@ -175,7 +174,6 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
@@ -187,7 +185,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
@@ -212,7 +210,7 @@ msg_bytes_ready:
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
if (!timeo) {
copied = -EAGAIN;
goto out;
@@ -235,7 +233,7 @@ out:
}
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -245,11 +243,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock)) {
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
lock_sock(sk);
msg_bytes_ready:
@@ -258,14 +256,14 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = tcp_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
release_sock(sk);
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
copied = -EAGAIN;
}
@@ -280,7 +278,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
{
bool cork = false, enospc = sk_msg_full(msg);
struct sock *sk_redir;
- u32 tosend, delta = 0;
+ u32 tosend, origsize, sent, delta = 0;
u32 eval = __SK_NONE;
int ret;
@@ -338,7 +336,9 @@ more_data:
sk_msg_return(sk, msg, tosend);
release_sock(sk);
+ origsize = msg->sg.size;
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+ sent = origsize - msg->sg.size;
if (eval == __SK_REDIRECT)
sock_put(sk_redir);
@@ -375,8 +375,11 @@ more_data:
}
if (msg &&
msg->sg.data[msg->sg.start].page_link &&
- msg->sg.data[msg->sg.start].length)
+ msg->sg.data[msg->sg.start].length) {
+ if (eval == __SK_REDIRECT)
+ sk_mem_charge(sk, tosend - sent);
goto more_data;
+ }
}
return ret;
}
@@ -539,6 +542,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
+ prot[TCP_BPF_BASE].destroy = sock_map_destroy;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
@@ -605,14 +609,11 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
} else {
sk->sk_write_space = psock->saved_write_space;
/* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ sock_replace_proto(sk, psock->sk_proto);
}
return 0;
}
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
-
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return -EINVAL;
@@ -621,7 +622,7 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
}
/* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
+ sock_replace_proto(sk, &tcp_bpf_prots[family][config]);
return 0;
}
EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 709d23801823..ba4d98e510e0 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
return;
}
}
@@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -243,7 +243,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
struct cdg *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- if (prandom_u32() <= nexp_u32(grad * backoff_factor))
+ if (get_random_u32() <= nexp_u32(grad * backoff_factor))
return false;
if (use_ineff) {
@@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
return false;
}
- ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp));
ca->state = CDG_BACKOFF;
tcp_enter_cwr(sk);
return true;
@@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (!tcp_is_cwnd_limited(sk)) {
- ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp));
return;
}
- prior_snd_cwnd = tp->snd_cwnd;
+ prior_snd_cwnd = tcp_snd_cwnd(tp);
tcp_reno_cong_avoid(sk, ack, acked);
- incr = tp->snd_cwnd - prior_snd_cwnd;
+ incr = tcp_snd_cwnd(tp) - prior_snd_cwnd;
ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
}
@@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (ca->state == CDG_BACKOFF)
- return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
+ return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10);
if (ca->state == CDG_NONFULL && use_tolerance)
- return tp->snd_cwnd;
+ return tcp_snd_cwnd(tp);
- ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp));
if (use_shadow)
- return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
- return max(2U, tp->snd_cwnd >> 1);
+ return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1);
+ return max(2U, tcp_snd_cwnd(tp) >> 1);
}
static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
@@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
ca->gradients = gradients;
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
break;
case CA_EVENT_COMPLETE_CWR:
ca->state = CDG_UNKNOWN;
@@ -375,12 +375,13 @@ static void tcp_cdg_init(struct sock *sk)
struct cdg *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ ca->gradients = NULL;
/* We silently fall back to window = 1 if allocation fails. */
if (window > 1)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
GFP_NOWAIT | __GFP_NOWARN);
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
}
static void tcp_cdg_release(struct sock *sk)
@@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock *sk)
struct cdg *ca = inet_csk_ca(sk);
kfree(ca->gradients);
+ ca->gradients = NULL;
}
static struct tcp_congestion_ops tcp_cdg __read_mostly = {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db5831e6c136..d3cae40749e8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>
+#include <trace/events/tcp.h>
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
@@ -33,6 +34,17 @@ struct tcp_congestion_ops *tcp_ca_find(const char *name)
return NULL;
}
+void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ trace_tcp_cong_state_set(sk, ca_state);
+
+ if (icsk->icsk_ca_ops->set_state)
+ icsk->icsk_ca_ops->set_state(sk, ca_state);
+ icsk->icsk_ca_state = ca_state;
+}
+
/* Must be called with rcu lock held */
static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
const char *name)
@@ -135,7 +147,6 @@ u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
return key;
}
-EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
char *tcp_ca_get_name_by_key(u32 key, char *buffer)
{
@@ -151,7 +162,6 @@ char *tcp_ca_get_name_by_key(u32 key, char *buffer)
return ret;
}
-EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
/* Assign choice of congestion control. */
void tcp_assign_congestion_control(struct sock *sk)
@@ -395,10 +405,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
+ u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+ acked -= cwnd - tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));
return acked;
}
@@ -412,7 +422,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
tp->snd_cwnd_cnt += acked;
@@ -420,9 +430,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
u32 delta = tp->snd_cwnd_cnt / w;
tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
}
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -447,7 +457,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
}
/* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
@@ -456,7 +466,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
@@ -464,7 +474,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd, tp->prior_cwnd);
+ return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
}
EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index e07837e23b3f..768c10c1f649 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd, acked);
+ bictcp_update(ca, tcp_snd_cwnd(tp), acked);
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -346,13 +346,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void cubictcp_state(struct sock *sk, u8 new_state)
@@ -372,7 +372,7 @@ static void cubictcp_state(struct sock *sk, u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static u32 hystart_ack_delay(struct sock *sk)
+static u32 hystart_ack_delay(const struct sock *sk)
{
unsigned long rate;
@@ -380,7 +380,7 @@ static u32 hystart_ack_delay(struct sock *sk)
if (!rate)
return 0;
return min_t(u64, USEC_PER_MSEC,
- div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+ div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate));
}
static void hystart_update(struct sock *sk, u32 delay)
@@ -413,13 +413,13 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->found = 1;
pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
now - ca->round_start, threshold,
- ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
+ ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp));
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -438,8 +438,8 @@ static void hystart_update(struct sock *sk, u32 delay)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -469,7 +469,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
/* hystart triggers when cwnd is larger than some threshold */
if (!ca->found && tcp_in_slow_start(tp) && hystart &&
- tp->snd_cwnd >= hystart_low_window)
+ tcp_snd_cwnd(tp) >= hystart_low_window)
hystart_update(sk, delay);
}
@@ -485,20 +485,23 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.name = "cubic",
};
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET8_START(tcp_cubic_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
-BTF_ID(func, cubictcp_init)
-BTF_ID(func, cubictcp_recalc_ssthresh)
-BTF_ID(func, cubictcp_cong_avoid)
-BTF_ID(func, cubictcp_state)
-BTF_ID(func, cubictcp_cwnd_event)
-BTF_ID(func, cubictcp_acked)
+BTF_ID_FLAGS(func, cubictcp_init)
+BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
+BTF_ID_FLAGS(func, cubictcp_cong_avoid)
+BTF_ID_FLAGS(func, cubictcp_state)
+BTF_ID_FLAGS(func, cubictcp_cwnd_event)
+BTF_ID_FLAGS(func, cubictcp_acked)
#endif
#endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET8_END(tcp_cubic_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &tcp_cubic_check_kfunc_ids,
+};
static int __init cubictcp_register(void)
{
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
/* divide by bic_scale and by constant Srtt (100ms) */
do_div(cube_factor, bic_scale * 10);
- ret = tcp_register_congestion_control(&cubictcp);
- if (ret)
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&cubictcp);
}
static void __exit cubictcp_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
tcp_unregister_congestion_control(&cubictcp);
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 0d7ab3cc7b61..2a6c0dd665a4 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -106,8 +106,8 @@ static u32 dctcp_ssthresh(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
}
static void dctcp_update_alpha(struct sock *sk, u32 flags)
@@ -148,8 +148,8 @@ static void dctcp_react_to_loss(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static void dctcp_state(struct sock *sk, u8 new_state)
@@ -211,8 +211,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
static u32 dctcp_cwnd_undo(struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+ return max(tcp_snd_cwnd(tp), ca->loss_cwnd);
}
static struct tcp_congestion_ops dctcp __read_mostly = {
@@ -238,36 +239,38 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
.name = "dctcp-reno",
};
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET8_START(tcp_dctcp_check_kfunc_ids)
#ifdef CONFIG_X86
#ifdef CONFIG_DYNAMIC_FTRACE
-BTF_ID(func, dctcp_init)
-BTF_ID(func, dctcp_update_alpha)
-BTF_ID(func, dctcp_cwnd_event)
-BTF_ID(func, dctcp_ssthresh)
-BTF_ID(func, dctcp_cwnd_undo)
-BTF_ID(func, dctcp_state)
+BTF_ID_FLAGS(func, dctcp_init)
+BTF_ID_FLAGS(func, dctcp_update_alpha)
+BTF_ID_FLAGS(func, dctcp_cwnd_event)
+BTF_ID_FLAGS(func, dctcp_ssthresh)
+BTF_ID_FLAGS(func, dctcp_cwnd_undo)
+BTF_ID_FLAGS(func, dctcp_state)
#endif
#endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET8_END(tcp_dctcp_check_kfunc_ids)
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &tcp_dctcp_check_kfunc_ids,
+};
static int __init dctcp_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
- ret = tcp_register_congestion_control(&dctcp);
- if (ret)
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+ if (ret < 0)
return ret;
- register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
- return 0;
+ return tcp_register_congestion_control(&dctcp);
}
static void __exit dctcp_unregister(void)
{
- unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
tcp_unregister_congestion_control(&dctcp);
}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 75a1c985f49a..01b50fa79189 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -181,13 +181,21 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
- inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r);
+ struct inet_hashinfo *hinfo;
+
+ hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo;
+
+ inet_diag_dump_icsk(hinfo, skb, cb, r);
}
static int tcp_diag_dump_one(struct netlink_callback *cb,
const struct inet_diag_req_v2 *req)
{
- return inet_diag_dump_one_icsk(&tcp_hashinfo, cb, req);
+ struct inet_hashinfo *hinfo;
+
+ hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo;
+
+ return inet_diag_dump_one_icsk(hinfo, cb, req);
}
#ifdef CONFIG_INET_DIAG_DESTROY
@@ -195,9 +203,13 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
struct net *net = sock_net(in_skb->sk);
- struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+ struct inet_hashinfo *hinfo;
+ struct sock *sk;
int err;
+ hinfo = net->ipv4.tcp_death_row.hashinfo;
+ sk = inet_diag_find_one_icsk(net, hinfo, req);
+
if (IS_ERR(sk))
return PTR_ERR(sk);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdbcf2a6d08e..45cc7f1ca296 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -272,8 +272,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
* The request socket is not added to the ehash
* because it's been added to the accept queue directly.
*/
+ req->timeout = tcp_timeout_init(child);
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+ req->timeout, TCP_RTO_MAX);
refcount_set(&req->rsk_refcnt, 2);
@@ -332,7 +333,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
@@ -347,7 +348,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
@@ -489,7 +490,7 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
return;
/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
@@ -510,7 +511,8 @@ void tcp_fastopen_active_disable(struct sock *sk)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ unsigned int tfo_bh_timeout =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
unsigned long timeout;
int tfo_da_times;
int multiplier;
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 349069d6cd0a..c6de5ce79ad3 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* snd_cwnd <=
* hstcp_aimd_vals[ca->ai].cwnd
*/
- if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
+ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) {
+ while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd &&
ca->ai < HSTCP_AIMD_MAX - 1)
ca->ai++;
- } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
- while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
+ } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+ while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd)
ca->ai--;
}
/* Do additive increase */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
/* cwnd = cwnd + a(w) / cwnd */
tp->snd_cwnd_cnt += ca->ai + 1;
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd_cnt -= tp->snd_cwnd;
- tp->snd_cwnd++;
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
}
}
@@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
struct hstcp *ca = inet_csk_ca(sk);
/* Do multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
}
static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 55adcfcf96fe..52b1f2665dfa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk,
ca->packetcount += sample->pkts_acked;
- if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
+ if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) &&
now - ca->lasttime >= ca->minRTT &&
ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
@@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
- return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
+ return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U);
}
static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
- if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
htcp_alpha_update(ca);
} else
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index be39327e04e6..abd7d91807e5 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk)
ca->rho2_7ls = 0;
ca->snd_cwnd_cents = 0;
ca->hybla_en = true;
- tp->snd_cwnd = 2;
+ tcp_snd_cwnd_set(tp, 2);
tp->snd_cwnd_clamp = 65535;
/* 1st Rho measurement based on initial srtt */
@@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk)
/* set minimum rtt as this is the 1st ever seen */
ca->minrtt_us = tp->srtt_us;
- tp->snd_cwnd = ca->rho;
+ tcp_snd_cwnd_set(tp, ca->rho);
}
static void hybla_state(struct sock *sk, u8 ca_state)
@@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* as long as increment is estimated as (rho<<7)/window
* it already is <<7 and we can easily count its fractions.
*/
- increment = ca->rho2_7ls / tp->snd_cwnd;
+ increment = ca->rho2_7ls / tcp_snd_cwnd(tp);
if (increment < 128)
tp->snd_cwnd_cnt++;
}
odd = increment % 128;
- tp->snd_cwnd += increment >> 7;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7));
ca->snd_cwnd_cents += odd;
/* check when fractions goes >=128 and increase cwnd by 1. */
while (ca->snd_cwnd_cents >= 128) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
ca->snd_cwnd_cents -= 128;
tp->snd_cwnd_cnt = 0;
}
/* check when cwnd has not been incremented for a while */
- if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd++;
+ if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
}
/* clamp down slowstart cwnd to ssthresh value. */
if (is_slowstart)
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh));
- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00e54873213e..c0c81a2c77fa 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -224,7 +224,7 @@ static void update_params(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
- if (tp->snd_cwnd < win_thresh) {
+ if (tcp_snd_cwnd(tp) < win_thresh) {
ca->alpha = ALPHA_BASE;
ca->beta = BETA_BASE;
} else if (ca->cnt_rtt > 0) {
@@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* tp->snd_cwnd += alpha/tp->snd_cwnd
*/
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
- if (delta >= tp->snd_cwnd) {
- tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
- (u32)tp->snd_cwnd_clamp);
+ if (delta >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp),
+ (u32)tp->snd_cwnd_clamp));
tp->snd_cwnd_cnt = 0;
}
}
@@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
+ u32 decr;
/* Multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
+ decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT;
+ return max(tcp_snd_cwnd(tp) - decr, 2U);
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc49a3d551eb..0640453fce54 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
per_mss = roundup_pow_of_two(per_mss) +
SKB_DATA_ALIGN(sizeof(struct sk_buff));
- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+ nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
/* Fast Recovery (RFC 5681 3.2) :
@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
if (sk->sk_sndbuf < sndmem)
WRITE_ONCE(sk->sk_sndbuf,
- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
+ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
*/
static void tcp_init_buffer_space(struct sock *sk)
{
- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
+ int rmem2;
icsk->icsk_ack.quick = 0;
+ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < rmem2 &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
WRITE_ONCE(sk->sk_rcvbuf,
- min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]));
+ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -724,7 +725,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -805,7 +806,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
* restart window, so that we send ACKs quickly.
*/
tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
- sk_mem_reclaim(sk);
}
}
icsk->icsk_ack.lrcvtime = now;
@@ -909,12 +909,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
* If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
* end of slow start and should slow down.
*/
- if (tp->snd_cwnd < tp->snd_ssthresh / 2)
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
+ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
else
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
- rate *= max(tp->snd_cwnd, tp->packets_out);
+ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
@@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
@@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
(mss != tcp_skb_seglen(skb)))
goto out;
+ if (!tcp_skb_can_collapse(prev, skb))
+ goto out;
len = skb->len;
pcount = tcp_skb_pcount(skb);
if (tcp_skb_shift(prev, skb, pcount, len))
@@ -2028,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -2093,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
@@ -2137,6 +2140,7 @@ void tcp_enter_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
@@ -2145,22 +2149,24 @@ void tcp_enter_loss(struct sock *sk)
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
- tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -2169,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk)
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
- tp->frto = net->ipv4.sysctl_tcp_frto &&
+ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -2186,7 +2192,8 @@ void tcp_enter_loss(struct sock *sk)
*/
static bool tcp_check_sack_reneging(struct sock *sk, int flag)
{
- if (flag & FLAG_SACK_RENEGING) {
+ if (flag & FLAG_SACK_RENEGING &&
+ flag & FLAG_SND_UNA_ADVANCED) {
struct tcp_sock *tp = tcp_sk(sk);
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
msecs_to_jiffies(10));
@@ -2456,7 +2463,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
msg,
&inet->inet_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2465,7 +2472,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
&sk->sk_v6_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2490,7 +2497,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (tp->prior_ssthresh) {
const struct inet_connection_sock *icsk = inet_csk(sk);
- tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
+ tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2507,6 +2514,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp)
return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
}
+static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
+ /* Hold old state until something *above* high_seq
+ * is ACKed. For Reno it is MUST to prevent false
+ * fast retransmits (RFC2582). SACK TCP is safe. */
+ if (!tcp_any_retrans_done(sk))
+ tp->retrans_stamp = 0;
+ return true;
+ }
+ return false;
+}
+
/* People celebrate: "We love our President!" */
static bool tcp_try_undo_recovery(struct sock *sk)
{
@@ -2529,14 +2551,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
} else if (tp->rack.reo_wnd_persist) {
tp->rack.reo_wnd_persist--;
}
- if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
- /* Hold old state until something *above* high_seq
- * is ACKed. For Reno it is MUST to prevent false
- * fast retransmits (RFC2582). SACK TCP is safe. */
- if (!tcp_any_retrans_done(sk))
- tp->retrans_stamp = 0;
+ if (tcp_is_non_sack_preventing_reopen(sk))
return true;
- }
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
return false;
@@ -2572,6 +2588,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
+ if (tcp_is_non_sack_preventing_reopen(sk))
+ return true;
if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
@@ -2597,7 +2615,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tp->high_seq = tp->snd_nxt;
tp->tlp_high_seq = 0;
tp->snd_cwnd_cnt = 0;
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->prr_delivered = 0;
tp->prr_out = 0;
tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
@@ -2618,16 +2636,16 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) {
- sndcnt = min_t(int, delta,
- max_t(int, tp->prr_delivered - tp->prr_out,
- newly_acked_sacked) + 1);
} else {
- sndcnt = min(delta, newly_acked_sacked);
+ sndcnt = max_t(int, tp->prr_delivered - tp->prr_out,
+ newly_acked_sacked);
+ if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
+ sndcnt++;
+ sndcnt = min(delta, sndcnt);
}
/* Force a fast retransmit upon entering fast recovery */
sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
- tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
}
static inline void tcp_end_cwnd_reduction(struct sock *sk)
@@ -2640,7 +2658,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
(inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
- tp->snd_cwnd = tp->snd_ssthresh;
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2704,12 +2722,15 @@ static void tcp_mtup_probe_success(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ u64 val;
- /* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = tp->snd_cwnd *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size;
+
+ val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache);
+ do_div(val, icsk->icsk_mtup.probe_size);
+ DEBUG_NET_WARN_ON_ONCE((u32)val != val);
+ tcp_snd_cwnd_set(tp, max_t(u32, 1U, val));
+
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
@@ -3032,7 +3053,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tp->snd_una == tp->mtu_probe.probe_seq_start) {
tcp_mtup_probe_failed(sk);
/* Restores the reduction we did in tcp_mtup_probe() */
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tcp_simple_retransmit(sk);
return;
}
@@ -3049,7 +3070,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
@@ -3459,7 +3480,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
@@ -3571,7 +3593,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed &&
+ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
@@ -3603,12 +3626,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
/* RFC 5961 7 [ACK Throttling] */
static void tcp_send_challenge_ack(struct sock *sk)
{
- /* unprotected vars, we dont care of overwrites */
- static u32 challenge_timestamp;
- static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
- u32 count, now;
+ u32 count, now, ack_limit;
/* First check our per-socket dupack rate limit. */
if (__tcp_oow_rate_limited(net,
@@ -3616,18 +3636,22 @@ static void tcp_send_challenge_ack(struct sock *sk)
&tp->last_oow_ack_time))
return;
+ ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
+ if (ack_limit == INT_MAX)
+ goto send_ack;
+
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
- if (now != challenge_timestamp) {
- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) {
u32 half = (ack_limit + 1) >> 1;
- challenge_timestamp = now;
- WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
+ WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now);
+ WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit));
}
- count = READ_ONCE(challenge_count);
+ count = READ_ONCE(net->ipv4.tcp_challenge_count);
if (count > 0) {
- WRITE_ONCE(challenge_count, count - 1);
+ WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1);
+send_ack:
NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
@@ -3764,7 +3788,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una - tp->max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk);
- return -1;
+ return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
}
goto old_ack;
}
@@ -3773,7 +3797,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
- return -1;
+ return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
@@ -3865,7 +3889,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
num_dupack = 1;
/* Consider if pure acks were aggregated in tcp_add_backlog() */
if (!(flag & FLAG_DATA))
@@ -3961,7 +3986,7 @@ static bool smc_parse_options(const struct tcphdr *th,
/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
* value on success.
*/
-static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
{
const unsigned char *ptr = (const unsigned char *)(th + 1);
int length = (th->doff * 4) - sizeof(struct tcphdr);
@@ -4000,6 +4025,7 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
}
return mss;
}
+EXPORT_SYMBOL_GPL(tcp_parse_mss_option);
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
@@ -4050,7 +4076,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && net->ipv4.sysctl_tcp_window_scaling) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@@ -4066,7 +4092,7 @@ void tcp_parse_options(const struct net *net,
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -4074,7 +4100,7 @@ void tcp_parse_options(const struct net *net,
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && net->ipv4.sysctl_tcp_sack) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@@ -4384,7 +4410,6 @@ void tcp_fin(struct sock *sk)
skb_rbtree_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
- sk_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
@@ -4415,7 +4440,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4462,7 +4487,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb);
@@ -4672,7 +4697,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
{
bool res = tcp_try_coalesce(sk, to, from, fragstolen);
- /* In case tcp_drop() is called later, update to->gso_segs */
+ /* In case tcp_drop_reason() is called later, update to->gso_segs */
if (res) {
u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
max_t(u16, 1, skb_shinfo(from)->gso_segs);
@@ -4682,10 +4707,11 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
return res;
}
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason reason)
{
sk_drops_add(sk, skb);
- __kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
/* This one checks to see if we can put data from the
@@ -4715,7 +4741,7 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
continue;
}
@@ -4771,7 +4797,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
sk->sk_data_ready(sk);
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
return;
}
@@ -4834,7 +4860,8 @@ coalesce_done:
/* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb,
+ SKB_DROP_REASON_TCP_OFOMERGE);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
@@ -4853,7 +4880,8 @@ coalesce_done:
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop_reason(sk, skb1,
+ SKB_DROP_REASON_TCP_OFOMERGE);
goto merge_right;
}
} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4881,7 +4909,7 @@ merge_right:
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb1);
+ tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE);
}
/* If there is no skb after us, we are the last_skb ! */
if (!skb1)
@@ -4980,6 +5008,7 @@ void tcp_data_ready(struct sock *sk)
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ enum skb_drop_reason reason;
bool fragstolen;
int eaten;
@@ -4998,6 +5027,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
tp->rx_opt.dsack = 0;
/* Queue data for delivery to the user.
@@ -5006,6 +5036,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0) {
+ reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
}
@@ -5015,6 +5046,7 @@ queue_and_out:
if (skb_queue_len(&sk->sk_receive_queue) == 0)
sk_forced_mem_schedule(sk, skb->truesize);
else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ reason = SKB_DROP_REASON_PROTO_MEM;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
sk->sk_data_ready(sk);
goto drop;
@@ -5051,6 +5083,7 @@ queue_and_out:
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
tcp_rcv_spurious_retrans(sk, skb);
/* A retransmit, 2nd most common case. Force an immediate ack. */
+ reason = SKB_DROP_REASON_TCP_OLD_DATA;
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -5058,13 +5091,16 @@ out_of_window:
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_schedule_ack(sk);
drop:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
return;
}
/* Out of window. F.e. zero window probe. */
- if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ if (!before(TCP_SKB_CB(skb)->seq,
+ tp->rcv_nxt + tcp_receive_window(tp))) {
+ reason = SKB_DROP_REASON_TCP_OVERWINDOW;
goto out_of_window;
+ }
if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
/* Partial packet, seq < rcv_next < end_seq */
@@ -5074,6 +5110,7 @@ drop:
* remembering D-SACK for its head made in previous line.
*/
if (!tcp_receive_window(tp)) {
+ reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
}
@@ -5269,7 +5306,7 @@ new_range:
before(TCP_SKB_CB(skb)->end_seq, start)) {
/* Do not attempt collapsing tiny skbs */
if (range_truesize != head->truesize ||
- end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+ end - start >= SKB_WITH_OVERHEAD(PAGE_SIZE)) {
tcp_collapse(sk, NULL, &tp->out_of_order_queue,
head, skb, start, end);
} else {
@@ -5315,9 +5352,9 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
- tcp_drop(sk, rb_to_skb(node));
+ tcp_drop_reason(sk, rb_to_skb(node),
+ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
if (!prev || goal <= 0) {
- sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
break;
@@ -5364,7 +5401,6 @@ static int tcp_prune_queue(struct sock *sk)
skb_peek(&sk->sk_receive_queue),
NULL,
tp->copied_seq, tp->rcv_nxt);
- sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
@@ -5417,7 +5453,7 @@ static bool tcp_should_expand_sndbuf(struct sock *sk)
return false;
/* If we filled the congestion window, do not expand. */
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
return false;
return true;
@@ -5435,7 +5471,17 @@ static void tcp_new_space(struct sock *sk)
INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
/* pairs with tcp_poll() */
smp_mb();
@@ -5485,7 +5531,7 @@ send_now:
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5506,11 +5552,12 @@ send_now:
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+ delay = min_t(unsigned long,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT);
}
@@ -5538,7 +5585,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
@@ -5648,7 +5695,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool rst_seq_match = false;
+ SKB_DR(reason);
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
@@ -5660,6 +5707,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard;
}
/* Reset is accepted even if it did not pass PAWS. */
@@ -5681,8 +5729,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
} else if (tcp_reset_check(sk, skb)) {
- tcp_reset(sk, skb);
+ goto reset;
}
+ SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
@@ -5698,9 +5747,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
* Send a challenge ACK
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
- tcp_reset_check(sk, skb)) {
- rst_seq_match = true;
- } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
+ tcp_reset_check(sk, skb))
+ goto reset;
+
+ if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
struct tcp_sack_block *sp = &tp->selective_acks[0];
int max_sack = sp[0].end_seq;
int this_sack;
@@ -5713,21 +5763,18 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
if (TCP_SKB_CB(skb)->seq == max_sack)
- rst_seq_match = true;
+ goto reset;
}
- if (rst_seq_match)
- tcp_reset(sk, skb);
- else {
- /* Disable TFO if RST is out-of-order
- * and no data has been received
- * for current active TFO socket
- */
- if (tp->syn_fastopen && !tp->data_segs_in &&
- sk->sk_state == TCP_ESTABLISHED)
- tcp_fastopen_active_disable(sk);
- tcp_send_challenge_ack(sk);
- }
+ /* Disable TFO if RST is out-of-order
+ * and no data has been received
+ * for current active TFO socket
+ */
+ if (tp->syn_fastopen && !tp->data_segs_in &&
+ sk->sk_state == TCP_ESTABLISHED)
+ tcp_fastopen_active_disable(sk);
+ tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_RESET);
goto discard;
}
@@ -5742,6 +5789,7 @@ syn_challenge:
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_INVALID_SYN);
goto discard;
}
@@ -5750,7 +5798,12 @@ syn_challenge:
return true;
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
+ return false;
+
+reset:
+ tcp_reset(sk, skb);
+ __kfree_skb(skb);
return false;
}
@@ -5779,6 +5832,7 @@ discard:
*/
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct tcphdr *th = (const struct tcphdr *)skb->data;
struct tcp_sock *tp = tcp_sk(sk);
unsigned int len = skb->len;
@@ -5867,6 +5921,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
return;
} else { /* Header too small */
+ reason = SKB_DROP_REASON_PKT_TOO_SMALL;
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
@@ -5894,6 +5949,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
+ skb_dst_drop(skb);
__skb_pull(skb, tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5922,8 +5978,10 @@ slow_path:
if (len < (th->doff << 2) || tcp_checksum_complete(skb))
goto csum_error;
- if (!th->ack && !th->rst && !th->syn)
+ if (!th->ack && !th->rst && !th->syn) {
+ reason = SKB_DROP_REASON_TCP_FLAGS;
goto discard;
+ }
/*
* Standard slow path.
@@ -5933,9 +5991,11 @@ slow_path:
return;
step5:
- if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+ reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
+ if ((int)reason < 0) {
+ reason = -reason;
goto discard;
-
+ }
tcp_rcv_rtt_measure_ts(sk, skb);
/* Process urgent data. */
@@ -5949,12 +6009,13 @@ step5:
return;
csum_error:
+ reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
}
EXPORT_SYMBOL(tcp_rcv_established);
@@ -5974,9 +6035,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
* retransmission has occurred.
*/
if (tp->total_retrans > 1 && tp->undo_marker)
- tp->snd_cwnd = 1;
+ tcp_snd_cwnd_set(tp, 1);
else
- tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
tp->snd_cwnd_stamp = tcp_jiffies32;
bpf_skops_established(sk, bpf_op, skb);
@@ -6112,6 +6173,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
+ SKB_DR(reason);
tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -6154,7 +6216,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (th->rst) {
tcp_reset(sk, skb);
- goto discard;
+consume:
+ __kfree_skb(skb);
+ return 0;
}
/* rfc793:
@@ -6164,9 +6228,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* See note below!
* --ANK(990513)
*/
- if (!th->syn)
+ if (!th->syn) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard_and_undo;
-
+ }
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
@@ -6243,13 +6308,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
-
-discard:
- tcp_drop(sk, skb);
- return 0;
- } else {
- tcp_send_ack(sk);
+ goto consume;
}
+ tcp_send_ack(sk);
return -1;
}
@@ -6261,15 +6322,16 @@ discard:
*
* Otherwise (no ACK) drop the segment and return."
*/
-
+ SKB_DR_SET(reason, TCP_RESET);
goto discard_and_undo;
}
/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
- tcp_paws_reject(&tp->rx_opt, 0))
+ tcp_paws_reject(&tp->rx_opt, 0)) {
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard_and_undo;
-
+ }
if (th->syn) {
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
@@ -6318,7 +6380,7 @@ discard:
*/
return -1;
#else
- goto discard;
+ goto consume;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
@@ -6328,7 +6390,8 @@ discard:
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
- goto discard;
+ tcp_drop_reason(sk, skb, reason);
+ return 0;
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
@@ -6383,21 +6446,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
struct request_sock *req;
int queued = 0;
bool acceptable;
+ SKB_DR(reason);
switch (sk->sk_state) {
case TCP_CLOSE:
+ SKB_DR_SET(reason, TCP_CLOSE);
goto discard;
case TCP_LISTEN:
if (th->ack)
return 1;
- if (th->rst)
+ if (th->rst) {
+ SKB_DR_SET(reason, TCP_RESET);
goto discard;
-
+ }
if (th->syn) {
- if (th->fin)
+ if (th->fin) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
+ }
/* It is possible that we process SYN packets from backlog,
* so we need to make sure to disable BH and RCU right there.
*/
@@ -6412,6 +6480,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
consume_skb(skb);
return 0;
}
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
case TCP_SYN_SENT:
@@ -6438,13 +6507,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true, &req_stolen))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
+ SKB_DR_SET(reason, TCP_FASTOPEN);
goto discard;
+ }
}
- if (!th->ack && !th->rst && !th->syn)
+ if (!th->ack && !th->rst && !th->syn) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
-
+ }
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
@@ -6457,6 +6529,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_OLD_ACK);
goto discard;
}
switch (sk->sk_state) {
@@ -6550,7 +6623,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
- goto discard;
+ goto consume;
}
break;
}
@@ -6558,7 +6631,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
- goto discard;
+ goto consume;
}
break;
@@ -6566,7 +6639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
- goto discard;
+ goto consume;
}
break;
}
@@ -6617,9 +6690,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!queued) {
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
}
return 0;
+
+consume:
+ __kfree_skb(skb);
+ return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);
@@ -6670,7 +6747,7 @@ static void tcp_ecn_create_request(struct request_sock *req,
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
- ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
+ ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
@@ -6701,7 +6778,8 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb);
#if IS_ENABLED(CONFIG_SMC)
- ireq->smc_ok = rx_opt->smc_ok;
+ ireq->smc_ok = rx_opt->smc_ok && !(tcp_sk(sk)->smc_hs_congested &&
+ tcp_sk(sk)->smc_hs_congested(sk));
#endif
}
@@ -6723,6 +6801,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
ireq->ireq_state = TCP_NEW_SYN_RECV;
write_pnet(&ireq->ireq_net, sock_net(sk_listener));
ireq->ireq_family = sk_listener->sk_family;
+ req->timeout = TCP_TIMEOUT_INIT;
}
return req;
@@ -6736,11 +6815,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
- bool want_cookie = false;
struct net *net = sock_net(sk);
+ bool want_cookie = false;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
#ifdef CONFIG_SYN_COOKIES
- if (net->ipv4.sysctl_tcp_syncookies) {
+ if (syncookies) {
msg = "Sending cookies";
want_cookie = true;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6748,8 +6830,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned &&
- net->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (!queue->synflood_warned && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, sk->sk_num, msg);
@@ -6798,7 +6879,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
@@ -6832,13 +6913,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
@@ -6887,10 +6970,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
+ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
/* Kill the following clause, if you dislike this way. */
- if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ if (!syncookies &&
+ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
@@ -6939,9 +7024,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
sock_put(fastopen_sk);
} else {
tcp_rsk(req)->tfo_listener = false;
- if (!want_cookie)
- inet_csk_reqsk_queue_hash_add(sk, req,
- tcp_timeout_init((struct sock *)req));
+ if (!want_cookie) {
+ req->timeout = tcp_timeout_init((struct sock *)req);
+ inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ }
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
TCP_SYNACK_COOKIE,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3f34e366b27..87d440f47a70 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
+static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
+
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -106,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -197,16 +199,18 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+ struct inet_timewait_death_row *tcp_death_row;
+ __be32 daddr, nexthop, prev_sk_rcv_saddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct ip_options_rcu *inet_opt;
+ struct net *net = sock_net(sk);
__be16 orig_sport, orig_dport;
- __be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
- struct ip_options_rcu *inet_opt;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@@ -227,13 +231,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_TCP,
- orig_sport, orig_dport, sk);
+ sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport,
+ orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
- IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return err;
}
@@ -245,10 +248,29 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr;
- if (!inet->inet_saddr)
+ tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+
+ if (!inet->inet_saddr) {
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
+ sk, net, inet->inet_num);
+ prev_sk_rcv_saddr = sk->sk_rcv_saddr;
+ }
inet->inet_saddr = fl4->saddr;
+ }
+
sk_rcv_saddr_set(sk, inet->inet_saddr);
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ inet->inet_saddr = 0;
+ sk_rcv_saddr_set(sk, prev_sk_rcv_saddr);
+ ip_rt_put(rt);
+ return err;
+ }
+ }
+
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
@@ -297,12 +319,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
usin->sin_port));
- tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
- inet->inet_saddr,
+ tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
inet->inet_daddr);
}
- inet->inet_id = prandom_u32();
+ inet->inet_id = get_random_u16();
if (tcp_fastopen_defer_connect(sk, &err))
return err;
@@ -474,9 +495,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
- th->dest, iph->saddr, ntohs(th->source),
- inet_iif(skb), 0);
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ iph->daddr, th->dest, iph->saddr,
+ ntohs(th->source), inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return -ENOENT;
@@ -739,8 +760,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
- ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
+ NULL, 0, ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), dif, sdif);
/* don't send rst if it can't find key */
@@ -810,13 +831,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
arg.tos = ip_hdr(skb)->tos;
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ sock_net_set(ctl_sk, net);
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
+ xfrm_sk_clone_policy(ctl_sk, sk);
}
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -825,6 +848,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
transmit_time);
ctl_sk->sk_mark = 0;
+ xfrm_sk_free_policy(ctl_sk);
+ sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
local_bh_enable();
@@ -908,7 +933,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
arg.tos = tos;
arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
local_bh_disable();
- ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
+ ctl_sk = this_cpu_read(ipv4_tcp_sk);
+ sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
@@ -921,6 +947,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
transmit_time);
ctl_sk->sk_mark = 0;
+ sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
local_bh_enable();
}
@@ -1001,7 +1028,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(inet_sk(sk)->tos & INET_ECN_MASK) :
inet_sk(sk)->tos;
@@ -1202,8 +1229,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->l3index = l3index;
key->flags = flags;
memcpy(&key->addr, addr,
- (family == AF_INET6) ? sizeof(struct in6_addr) :
- sizeof(struct in_addr));
+ (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
hlist_add_head_rcu(&key->node, &md5sig->head);
return 0;
}
@@ -1403,72 +1430,6 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
#endif
-/* Called with rcu_read_lock() */
-static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb,
- int dif, int sdif)
-{
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * This gets called for each TCP segment that arrives
- * so we want to be efficient.
- * We have 3 drop cases:
- * o No MD5 hash and one expected.
- * o MD5 hash and we're not expecting one.
- * o MD5 hash and its wrong.
- */
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- const union tcp_md5_addr *addr;
- unsigned char newhash[16];
- int genhash, l3index;
-
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- addr = (union tcp_md5_addr *)&iph->saddr;
- hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* Okay, so this is hash_expected and hash_location -
- * so we need to calculate the checksum.
- */
- genhash = tcp_v4_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
- &iph->saddr, ntohs(th->source),
- &iph->daddr, ntohs(th->dest),
- genhash ? " tcp_v4_calc_md5_hash failed"
- : "", l3index);
- return true;
- }
- return false;
-#endif
- return false;
-}
-
static void tcp_v4_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -1582,12 +1543,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
- newinet->inet_id = prandom_u32();
+ newinet->inet_id = get_random_u16();
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
if (!dst) {
@@ -1698,6 +1659,7 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
struct sock *rsk;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1720,6 +1682,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (tcp_checksum_complete(skb))
goto csum_err;
@@ -1747,7 +1710,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
reset:
tcp_v4_send_reset(rsk, skb);
discard:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
@@ -1756,6 +1719,7 @@ discard:
return 0;
csum_err:
+ reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -1765,6 +1729,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
int tcp_v4_early_demux(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
@@ -1781,7 +1746,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
if (th->doff < sizeof(struct tcphdr) / 4)
return 0;
- sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
skb->skb_iif, inet_sdif(skb));
@@ -1801,7 +1766,8 @@ int tcp_v4_early_demux(struct sk_buff *skb)
return 0;
}
-bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
u32 limit, tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
@@ -1827,6 +1793,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
trace_tcp_bad_csum(skb);
+ *reason = SKB_DROP_REASON_TCP_CSUM;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
return true;
@@ -1907,14 +1874,17 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
__skb_push(skb, hdrlen);
no_coalesce:
+ limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+
/* Only socket owner can try to collapse/prune rx queues
* to reduce memory overhead, so add a little headroom here.
* Few sockets backlog are possibly concurrently non empty.
*/
- limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf) + 64*1024;
+ limit += 64 * 1024;
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
+ *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
return true;
}
@@ -1965,13 +1935,13 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
int tcp_v4_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ enum skb_drop_reason drop_reason;
int sdif = inet_sdif(skb);
int dif = inet_iif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
struct sock *sk;
- int drop_reason;
int ret;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -2004,7 +1974,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo,
+ skb, __tcp_hdrlen(th), th->source,
th->dest, sdif, &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -2019,7 +1990,13 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &iph->saddr, &iph->daddr,
+ AF_INET, dif, sdif);
+ if (unlikely(drop_reason)) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -2051,6 +2028,8 @@ process:
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ } else {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -2066,6 +2045,7 @@ process:
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
@@ -2086,16 +2066,20 @@ process:
}
}
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_and_relse;
+ }
- if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
+ drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr,
+ &iph->daddr, AF_INET, dif, sdif);
+ if (drop_reason)
goto discard_and_relse;
nf_reset_ct(skb);
if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_TCP_FILTER;
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data;
@@ -2111,14 +2095,13 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
ret = tcp_v4_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -2148,6 +2131,7 @@ bad_packet:
}
discard_it:
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
/* Discard frame. */
kfree_skb_reason(skb, drop_reason);
return 0;
@@ -2160,6 +2144,7 @@ discard_and_relse:
do_time_wait:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -2172,9 +2157,9 @@ do_time_wait:
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
- struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- &tcp_hashinfo, skb,
- __tcp_hdrlen(th),
+ struct sock *sk2 = inet_lookup_listener(net,
+ net->ipv4.tcp_death_row.hashinfo,
+ skb, __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb),
@@ -2324,21 +2309,21 @@ static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
*/
static void *listening_get_first(struct seq_file *seq)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct tcp_iter_state *st = seq->private;
st->offset = 0;
- for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+ for (; st->bucket <= hinfo->lhash2_mask; st->bucket++) {
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk;
- ilb2 = &tcp_hashinfo.lhash2[st->bucket];
- if (hlist_empty(&ilb2->head))
+ ilb2 = &hinfo->lhash2[st->bucket];
+ if (hlist_nulls_empty(&ilb2->nulls_head))
continue;
spin_lock(&ilb2->lock);
- inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each(sk, node, &ilb2->nulls_head) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2357,20 +2342,21 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
{
struct tcp_iter_state *st = seq->private;
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
+ struct inet_hashinfo *hinfo;
struct sock *sk = cur;
++st->num;
++st->offset;
- icsk = inet_csk(sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk))
return sk;
}
- ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
+ ilb2 = &hinfo->lhash2[st->bucket];
spin_unlock(&ilb2->lock);
++st->bucket;
return listening_get_first(seq);
@@ -2392,9 +2378,10 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
return rc;
}
-static inline bool empty_bucket(const struct tcp_iter_state *st)
+static inline bool empty_bucket(struct inet_hashinfo *hinfo,
+ const struct tcp_iter_state *st)
{
- return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
+ return hlist_nulls_empty(&hinfo->ehash[st->bucket].chain);
}
/*
@@ -2403,20 +2390,21 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/
static void *established_get_first(struct seq_file *seq)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct tcp_iter_state *st = seq->private;
st->offset = 0;
- for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
+ for (; st->bucket <= hinfo->ehash_mask; ++st->bucket) {
struct sock *sk;
struct hlist_nulls_node *node;
- spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
+ spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
/* Lockless fast path for the common case of empty buckets */
- if (empty_bucket(st))
+ if (empty_bucket(hinfo, st))
continue;
spin_lock_bh(lock);
- sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+ sk_nulls_for_each(sk, node, &hinfo->ehash[st->bucket].chain) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2428,9 +2416,10 @@ static void *established_get_first(struct seq_file *seq)
static void *established_get_next(struct seq_file *seq, void *cur)
{
- struct sock *sk = cur;
- struct hlist_nulls_node *node;
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct tcp_iter_state *st = seq->private;
+ struct hlist_nulls_node *node;
+ struct sock *sk = cur;
++st->num;
++st->offset;
@@ -2442,7 +2431,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
return sk;
}
- spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
++st->bucket;
return established_get_first(seq);
}
@@ -2480,6 +2469,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_seek_last_pos(struct seq_file *seq)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct tcp_iter_state *st = seq->private;
int bucket = st->bucket;
int offset = st->offset;
@@ -2488,7 +2478,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
- if (st->bucket > tcp_hashinfo.lhash2_mask)
+ if (st->bucket > hinfo->lhash2_mask)
break;
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_first(seq);
@@ -2500,7 +2490,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
st->state = TCP_SEQ_STATE_ESTABLISHED;
fallthrough;
case TCP_SEQ_STATE_ESTABLISHED:
- if (st->bucket > tcp_hashinfo.ehash_mask)
+ if (st->bucket > hinfo->ehash_mask)
break;
rc = established_get_first(seq);
while (offset-- && rc && bucket == st->bucket)
@@ -2568,16 +2558,17 @@ EXPORT_SYMBOL(tcp_seq_next);
void tcp_seq_stop(struct seq_file *seq, void *v)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct tcp_iter_state *st = seq->private;
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+ spin_unlock(&hinfo->lhash2[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
- spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
break;
}
}
@@ -2665,7 +2656,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
@@ -2772,18 +2763,18 @@ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
struct sock *start_sk)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
unsigned int expected = 1;
struct sock *sk;
sock_hold(start_sk);
iter->batch[iter->end_sk++] = start_sk;
- icsk = inet_csk(start_sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk)) {
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
@@ -2792,7 +2783,7 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
expected++;
}
}
- spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+ spin_unlock(&hinfo->lhash2[st->bucket].lock);
return expected;
}
@@ -2800,6 +2791,7 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
struct sock *start_sk)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
struct hlist_nulls_node *node;
@@ -2819,13 +2811,14 @@ static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
expected++;
}
}
- spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
return expected;
}
static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
unsigned int expected;
@@ -2841,7 +2834,7 @@ static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
st->offset = 0;
st->bucket++;
if (st->state == TCP_SEQ_STATE_LISTENING &&
- st->bucket > tcp_hashinfo.lhash2_mask) {
+ st->bucket > hinfo->lhash2_mask) {
st->state = TCP_SEQ_STATE_ESTABLISHED;
st->bucket = 0;
}
@@ -3002,7 +2995,7 @@ static unsigned short seq_file_family(const struct seq_file *seq)
#endif
/* Iterated from proc fs */
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
return afinfo->family;
}
@@ -3093,7 +3086,10 @@ struct proto tcp_prot = {
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.sysctl_mem = sysctl_tcp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
@@ -3103,7 +3099,7 @@ struct proto tcp_prot = {
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
- .h.hashinfo = &tcp_hashinfo,
+ .h.hashinfo = NULL,
.no_autobind = true,
.diag_destroy = tcp_abort,
};
@@ -3111,42 +3107,43 @@ EXPORT_SYMBOL(tcp_prot);
static void __net_exit tcp_sk_exit(struct net *net)
{
- int cpu;
-
if (net->ipv4.tcp_congestion_control)
bpf_module_put(net->ipv4.tcp_congestion_control,
net->ipv4.tcp_congestion_control->owner);
-
- for_each_possible_cpu(cpu)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
- free_percpu(net->ipv4.tcp_sk);
}
-static int __net_init tcp_sk_init(struct net *net)
+static void __net_init tcp_set_hashinfo(struct net *net)
{
- int res, cpu, cnt;
-
- net->ipv4.tcp_sk = alloc_percpu(struct sock *);
- if (!net->ipv4.tcp_sk)
- return -ENOMEM;
+ struct inet_hashinfo *hinfo;
+ unsigned int ehash_entries;
+ struct net *old_net;
- for_each_possible_cpu(cpu) {
- struct sock *sk;
+ if (net_eq(net, &init_net))
+ goto fallback;
- res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
- IPPROTO_TCP, net);
- if (res)
- goto fail;
- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ old_net = current->nsproxy->net_ns;
+ ehash_entries = READ_ONCE(old_net->ipv4.sysctl_tcp_child_ehash_entries);
+ if (!ehash_entries)
+ goto fallback;
- /* Please enforce IP_DF and IPID==0 for RST and
- * ACK sent in SYN-RECV and TIME-WAIT state.
- */
- inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
-
- *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
+ ehash_entries = roundup_pow_of_two(ehash_entries);
+ hinfo = inet_pernet_hashinfo_alloc(&tcp_hashinfo, ehash_entries);
+ if (!hinfo) {
+ pr_warn("Failed to allocate TCP ehash (entries: %u) "
+ "for a netns, fallback to the global one\n",
+ ehash_entries);
+fallback:
+ hinfo = &tcp_hashinfo;
+ ehash_entries = tcp_hashinfo.ehash_mask + 1;
}
+ net->ipv4.tcp_death_row.hashinfo = hinfo;
+ net->ipv4.tcp_death_row.sysctl_max_tw_buckets = ehash_entries / 2;
+ net->ipv4.sysctl_max_syn_backlog = max(128U, ehash_entries / 128);
+}
+
+static int __net_init tcp_sk_init(struct net *net)
+{
net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_ecn_fallback = 1;
@@ -3172,11 +3169,9 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tw_reuse = 2;
net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
- cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
- net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
+ refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1);
+ tcp_set_hashinfo(net);
- net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
@@ -3197,9 +3192,12 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
/* Default TSQ limit of 16 TSO segments */
net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
- /* rfc5961 challenge ack rate limiting */
- net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
+
+ /* rfc5961 challenge ack rate limiting, per net-ns, disabled by default. */
+ net->ipv4.sysctl_tcp_challenge_ack_limit = INT_MAX;
+
net->ipv4.sysctl_tcp_min_tso_segs = 2;
+ net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */
net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
net->ipv4.sysctl_tcp_autocorking = 1;
net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
@@ -3229,20 +3227,19 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.tcp_congestion_control = &tcp_reno;
return 0;
-fail:
- tcp_sk_exit(net);
-
- return res;
}
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- inet_twsk_purge(&tcp_hashinfo, AF_INET);
+ tcp_twsk_purge(net_exit_list, AF_INET);
- list_for_each_entry(net, net_exit_list, exit_list)
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);
+ WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount));
tcp_fastopen_ctx_destroy(net);
+ }
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
@@ -3326,6 +3323,24 @@ static void __init bpf_iter_register(void)
void __init tcp_v4_init(void)
{
+ int cpu, res;
+
+ for_each_possible_cpu(cpu) {
+ struct sock *sk;
+
+ res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+ IPPROTO_TCP, &init_net);
+ if (res)
+ panic("Failed to create the TCP control socket.\n");
+ sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+ /* Please enforce IP_DF and IPID==0 for RST and
+ * ACK sent in SYN-RECV and TIME-WAIT state.
+ */
+ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
+
+ per_cpu(ipv4_tcp_sk, cpu) = sk;
+ }
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 82b36ec3f2f8..ae36780977d2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
lp->flag &= ~LP_WITHIN_THR;
pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
- tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
+ tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max,
lp->sowd >> 3);
if (lp->flag & LP_WITHIN_THR)
@@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
/* happened within inference
* drop snd_cwnd into 1 */
if (lp->flag & LP_WITHIN_INF)
- tp->snd_cwnd = 1U;
+ tcp_snd_cwnd_set(tp, 1U);
/* happened after inference
* cut snd_cwnd into half */
else
- tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U));
/* record this drop time */
lp->last_drop = now;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0588b004ddac..82f4575f9cd9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m;
sk_dst_confirm(sk);
- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;
rcu_read_lock();
@@ -385,29 +385,29 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
- if (val && (tp->snd_cwnd >> 1) > val)
+ if (val && (tcp_snd_cwnd(tp) >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- tp->snd_cwnd >> 1);
+ tcp_snd_cwnd(tp) >> 1);
}
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- if (tp->snd_cwnd > val)
+ if (tcp_snd_cwnd(tp) > val)
tcp_metric_set(tm, TCP_METRIC_CWND,
- tp->snd_cwnd);
+ tcp_snd_cwnd(tp));
}
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
+ max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
+ tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1);
}
} else {
/* Else slow start did not finish, cwnd is non-sense,
@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
@@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
@@ -462,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
@@ -968,6 +969,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = tcp_metrics_nl_ops,
.n_small_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
+ .resv_start_op = TCP_METRICS_CMD_DEL + 1,
};
static unsigned int tcpmhash_entries;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7c2d3ac2363a..c375f603a16c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -247,10 +247,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct inet_timewait_sock *tw;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
- tw = inet_twsk_alloc(sk, tcp_death_row, state);
+ tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state);
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -319,14 +319,14 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
/* Linkage updates.
* Note that access to tw after this point is illegal.
*/
- inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+ inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo);
local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
+ NET_INC_STATS(net, LINUX_MIB_TCPTIMEWAITOVERFLOW);
}
tcp_update_metrics(sk);
@@ -347,6 +347,27 @@ void tcp_twsk_destructor(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+void tcp_twsk_purge(struct list_head *net_exit_list, int family)
+{
+ bool purged_once = false;
+ struct net *net;
+
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ if (net->ipv4.tcp_death_row.hashinfo->pernet) {
+ /* Even if tw_refcount == 1, we must clean up kernel reqsk */
+ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
+ } else if (!purged_once) {
+ /* The last refcount is decremented in tcp_sk_exit_batch() */
+ if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1)
+ continue;
+
+ inet_twsk_purge(&tcp_hashinfo, family);
+ purged_once = true;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_twsk_purge);
+
/* Warning : This function is called without sk_listener being locked.
* Be sure to read socket fields once, as their value could change under us.
*/
@@ -531,7 +552,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
@@ -541,6 +562,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fastopen_req = NULL;
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
+ newtp->bpf_chg_cc_inprogress = 0;
tcp_bpf_clone(sk, newsk);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
@@ -583,7 +605,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* it can be estimated (approximately)
* from another data.
*/
- tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+ tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ;
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
}
}
@@ -622,8 +644,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
!inet_rtx_syn_ack(sk, req)) {
unsigned long expires = jiffies;
- expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
- TCP_RTO_MAX);
+ expires += reqsk_timeout(req, TCP_RTO_MAX);
if (!fastopen)
mod_timer_pending(&req->rsk_timer, expires);
else
@@ -782,7 +803,7 @@ listen_overflow:
if (sk != req->rsk_listener)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
- if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index ab552356bdba..a60662f4bdf9 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -197,10 +197,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (ca->cwnd_growth_factor < 0) {
- cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
+ cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor;
tcp_cong_avoid_ai(tp, cnt, acked);
} else {
- cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
+ cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor);
tcp_cong_avoid_ai(tp, cnt, acked);
}
}
@@ -209,7 +209,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
+ return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U);
}
static void tcpnv_state(struct sock *sk, u8 new_state)
@@ -257,7 +257,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
return;
/* Stop cwnd growth if we were in catch up mode */
- if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
+ if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) {
ca->nv_catchup = 0;
ca->nv_allow_cwnd_growth = 0;
}
@@ -371,7 +371,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* if cwnd < max_win, grow cwnd
* else leave the same
*/
- if (tp->snd_cwnd > max_win) {
+ if (tcp_snd_cwnd(tp) > max_win) {
/* there is congestion, check that it is ok
* to make a CA decision
* 1. We should have at least nv_dec_eval_min_calls
@@ -398,20 +398,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
ca->nv_allow_cwnd_growth = 0;
tp->snd_ssthresh =
(nv_ssthresh_factor * max_win) >> 3;
- if (tp->snd_cwnd - max_win > 2) {
+ if (tcp_snd_cwnd(tp) - max_win > 2) {
/* gap > 2, we do exponential cwnd decrease */
int dec;
- dec = max(2U, ((tp->snd_cwnd - max_win) *
+ dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) *
nv_cong_dec_mult) >> 7);
- tp->snd_cwnd -= dec;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec);
} else if (nv_cong_dec_mult > 0) {
- tp->snd_cwnd = max_win;
+ tcp_snd_cwnd_set(tp, max_win);
}
if (ca->cwnd_growth_factor > 0)
ca->cwnd_growth_factor = 0;
ca->nv_no_cong_cnt = 0;
- } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
+ } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) {
/* There is no congestion, grow cwnd if allowed*/
if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
return;
@@ -444,8 +444,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* (it wasn't before, if it is now is because nv
* decreased it).
*/
- if (tp->snd_cwnd < nv_min_cwnd)
- tp->snd_cwnd = nv_min_cwnd;
+ if (tcp_snd_cwnd(tp) < nv_min_cwnd)
+ tcp_snd_cwnd_set(tp, nv_min_cwnd);
}
}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 30abde86db45..45dda7889387 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -195,12 +195,9 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
off = skb_gro_offset(skb);
hlen = off + sizeof(*th);
- th = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- th = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!th))
- goto out;
- }
+ th = skb_gro_header(skb, hlen, off);
+ if (unlikely(!th))
+ goto out;
thlen = th->doff * 4;
if (thlen < sizeof(*th))
@@ -258,7 +255,15 @@ found:
mss = skb_shinfo(p)->gso_size;
- flush |= (len - 1) >= mss;
+ /* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
+ * If it is a single frame, do not aggregate it if its length
+ * is bigger than our mss.
+ */
+ if (unlikely(skb_is_gso(skb)))
+ flush |= (mss != skb_shinfo(skb)->gso_size);
+ else
+ flush |= (len - 1) >= mss;
+
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
#ifdef CONFIG_TLS_DEVICE
flush |= p->decrypted ^ skb->decrypted;
@@ -272,7 +277,12 @@ found:
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
out_check_final:
- flush = len < mss;
+ /* Force a flush if last segment is smaller than mss. */
+ if (unlikely(skb_is_gso(skb)))
+ flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
+ else
+ flush = len < mss;
+
flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
TCP_FLAG_RST | TCP_FLAG_SYN |
TCP_FLAG_FIN));
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5079832af5c1..c69f4d966024 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
@@ -142,7 +143,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
@@ -151,7 +152,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
- tp->snd_cwnd = max(cwnd, restart_cwnd);
+ tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd));
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_cwnd_used = 0;
}
@@ -166,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- /* If this is the first data packet sent in response to the
- * previous received data,
- * and it is a reply for ato after last received packet,
- * increase pingpong count.
- */
- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_inc_pingpong_cnt(sk);
-
tp->lsndtime = now;
+
+ /* If it is a reply for ato after last received
+ * packet, enter pingpong mode.
+ */
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ inet_csk_enter_pingpong_mode(sk);
}
/* Account for an ACK we sent. */
@@ -229,7 +227,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = min_t(u32, space, U16_MAX);
@@ -240,8 +238,8 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
*rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
- space = max_t(u32, space, sysctl_rmem_max);
+ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
@@ -284,7 +282,7 @@ static u16 tcp_select_window(struct sock *sk)
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale &&
- sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -323,7 +321,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
- bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
if (!use_ecn) {
@@ -345,7 +343,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
{
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
/* tp->ecn_flags are cleared at a later point in time when
* SYN ACK is ultimatively being received.
*/
@@ -444,12 +442,13 @@ struct tcp_out_options {
struct mptcp_out_options mptcp;
};
-static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
+static void mptcp_options_write(struct tcphdr *th, __be32 *ptr,
+ struct tcp_sock *tp,
struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options))
- mptcp_write_options(ptr, tp, &opts->mptcp);
+ mptcp_write_options(th, ptr, tp, &opts->mptcp);
#endif
}
@@ -605,9 +604,10 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
* At least SACK_PERM as the first option is known to lead to a disaster
* (but it may well be that other scenarios fail similarly).
*/
-static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
struct tcp_out_options *opts)
{
+ __be32 *ptr = (__be32 *)(th + 1);
u16 options = opts->options; /* mungable copy */
if (unlikely(OPTION_MD5 & options)) {
@@ -701,7 +701,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
smc_options_write(ptr, &options);
- mptcp_options_write(ptr, tp, opts);
+ mptcp_options_write(th, ptr, tp, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -788,18 +788,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -1013,7 +1013,7 @@ static void tcp_tsq_write(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) {
tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
}
@@ -1253,7 +1253,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp = tcp_sk(sk);
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
if (clone_it) {
oskb = skb;
@@ -1354,7 +1354,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ tcp_options_write(th, tp, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
@@ -1550,7 +1550,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* SO_SNDBUF values.
* Also allow first and last skb in retransmit queue to be split.
*/
- limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE);
if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
skb != tcp_rtx_queue_head(sk) &&
@@ -1589,7 +1589,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
skb_split(skb, buff, len);
- buff->tstamp = skb->tstamp;
+ skb_set_delivery_time(buff, skb->tstamp, true);
tcp_fragment_tstamp(skb, buff);
old_factor = tcp_skb_pcount(skb);
@@ -1716,7 +1716,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+ mss_now = max(mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
return mss_now;
}
@@ -1759,10 +1760,10 @@ void tcp_mtup_init(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
@@ -1860,9 +1861,9 @@ static void tcp_cwnd_application_limited(struct sock *sk)
/* Limited by application or receiver window. */
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
- if (win_used < tp->snd_cwnd) {
+ if (win_used < tcp_snd_cwnd(tp)) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+ tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
}
tp->snd_cwnd_used = 0;
}
@@ -1874,15 +1875,20 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
struct tcp_sock *tp = tcp_sk(sk);
- /* Track the maximum number of outstanding packets in each
- * window, and remember whether we were cwnd-limited then.
+ /* Track the strongest available signal of the degree to which the cwnd
+ * is fully utilized. If cwnd-limited then remember that fact for the
+ * current window. If not cwnd-limited then track the maximum number of
+ * outstanding packets in the current window. (If cwnd-limited then we
+ * chose to not update tp->max_packets_out to avoid an extra else
+ * clause with no functional impact.)
*/
- if (!before(tp->snd_una, tp->max_packets_seq) ||
- tp->packets_out > tp->max_packets_out ||
- is_cwnd_limited) {
- tp->max_packets_out = tp->packets_out;
- tp->max_packets_seq = tp->snd_nxt;
+ if (!before(tp->snd_una, tp->cwnd_usage_seq) ||
+ is_cwnd_limited ||
+ (!tp->is_cwnd_limited &&
+ tp->packets_out > tp->max_packets_out)) {
tp->is_cwnd_limited = is_cwnd_limited;
+ tp->max_packets_out = tp->packets_out;
+ tp->cwnd_usage_seq = tp->snd_nxt;
}
if (tcp_is_cwnd_limited(sk)) {
@@ -1894,7 +1900,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1951,25 +1957,34 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
}
/* Return how many segs we'd like on a TSO packet,
- * to send one TSO packet per ms
+ * depending on current pacing rate, and how close the peer is.
+ *
+ * Rationale is:
+ * - For close peers, we rather send bigger packets to reduce
+ * cpu costs, because occasional losses will be repaired fast.
+ * - For long distance/rtt flows, we would like to get ACK clocking
+ * with 1 ACK per ms.
+ *
+ * Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
+ * in bigger TSO bursts. We we cut the RTT-based allowance in half
+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
+ * is below 1500 bytes after 6 * ~500 usec = 3ms.
*/
static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
int min_tso_segs)
{
- u32 bytes, segs;
+ unsigned long bytes;
+ u32 r;
- bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+ bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
- /* Goal is to send at least one packet per ms,
- * not one big TSO packet every 100 ms.
- * This preserves ACK clocking and is consistent
- * with tcp_tso_should_defer() heuristic.
- */
- segs = max_t(u32, bytes / mss_now, min_tso_segs);
+ r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
+ if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
+ bytes += sk->sk_gso_max_size >> r;
+
+ bytes = min_t(unsigned long, bytes, sk->sk_gso_max_size);
- return segs;
+ return max_t(u32, bytes / mss_now, min_tso_segs);
}
/* Return the number of segments we want in the skb we are transmitting.
@@ -1982,7 +1997,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
@@ -2034,7 +2049,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
return 1;
in_flight = tcp_packets_in_flight(tp);
- cwnd = tp->snd_cwnd;
+ cwnd = tcp_snd_cwnd(tp);
if (in_flight >= cwnd)
return 0;
@@ -2187,12 +2202,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
in_flight = tcp_packets_in_flight(tp);
BUG_ON(tcp_skb_pcount(skb) <= 1);
- BUG_ON(tp->snd_cwnd <= in_flight);
+ BUG_ON(tcp_snd_cwnd(tp) <= in_flight);
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* From in_flight test above, we know that cwnd > in_flight. */
- cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+ cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache;
limit = min(send_win, cong_win);
@@ -2206,7 +2221,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
- u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+ u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
@@ -2270,7 +2285,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -2336,7 +2351,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (likely(!icsk->icsk_mtup.enabled ||
icsk->icsk_mtup.probe_size ||
inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
- tp->snd_cwnd < 11 ||
+ tcp_snd_cwnd(tp) < 11 ||
tp->rx_opt.num_sacks || tp->rx_opt.dsack))
return -1;
@@ -2354,7 +2369,7 @@ static int tcp_mtu_probe(struct sock *sk)
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
@@ -2372,7 +2387,7 @@ static int tcp_mtu_probe(struct sock *sk)
return 0;
/* Do we need to wait to drain cwnd? With none in flight, don't stall */
- if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
+ if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) {
if (!tcp_packets_in_flight(tp))
return -1;
else
@@ -2441,7 +2456,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
/* Decrement cwnd here because we are sending
* effectively two packets. */
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tcp_event_new_data_sent(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
@@ -2494,7 +2509,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
@@ -2616,7 +2631,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
- skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
goto repair; /* Skip network transmission */
@@ -2698,7 +2714,7 @@ repair:
else
tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp));
if (likely(sent_pkts || is_cwnd_limited))
tcp_cwnd_validate(sk, is_cwnd_limited);
@@ -2727,7 +2743,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
if (rcu_access_pointer(tp->fastopen_rsk))
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
@@ -2808,7 +2824,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (unlikely(!skb)) {
WARN_ONCE(tp->packets_out,
"invalid inflight: %u state %u cwnd %u mss %d\n",
- tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
inet_csk(sk)->icsk_pending = 0;
return;
}
@@ -3091,7 +3107,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -3131,7 +3147,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
struct tcp_sock *tp = tcp_sk(sk);
unsigned int cur_mss;
int diff, len, err;
-
+ int avail_wnd;
/* Inconclusive MTU probe */
if (icsk->icsk_mtup.probe_size)
@@ -3153,17 +3169,25 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
return -EHOSTUNREACH; /* Routing failure or similar. */
cur_mss = tcp_current_mss(sk);
+ avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case
- * our retransmit serves as a zero window probe.
+ * our retransmit of one segment serves as a zero window probe.
*/
- if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
- TCP_SKB_CB(skb)->seq != tp->snd_una)
- return -EAGAIN;
+ if (avail_wnd <= 0) {
+ if (TCP_SKB_CB(skb)->seq != tp->snd_una)
+ return -EAGAIN;
+ avail_wnd = cur_mss;
+ }
len = cur_mss * segs;
+ if (len > avail_wnd) {
+ len = rounddown(avail_wnd, cur_mss);
+ if (!len)
+ len = avail_wnd;
+ }
if (skb->len > len) {
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
cur_mss, GFP_ATOMIC))
@@ -3177,8 +3201,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
diff -= tcp_skb_pcount(skb);
if (diff)
tcp_adjust_pcount(sk, skb, diff);
- if (skb->len < cur_mss)
- tcp_retrans_try_collapse(sk, skb, cur_mss);
+ avail_wnd = min_t(int, avail_wnd, cur_mss);
+ if (skb->len < avail_wnd)
+ tcp_retrans_try_collapse(sk, skb, avail_wnd);
}
/* RFC3168, section 6.1.1.1. ECN fallback */
@@ -3292,7 +3317,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (!hole)
tp->retransmit_skb_hint = skb;
- segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
+ segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp);
if (segs <= 0)
break;
sacked = TCP_SKB_CB(skb)->sacked;
@@ -3349,12 +3374,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt;
+ int delta, amt;
- if (size <= sk->sk_forward_alloc)
+ delta = size - sk->sk_forward_alloc;
+ if (delta <= 0)
return;
- amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+ amt = sk_mem_pages(delta);
+ sk->sk_forward_alloc += amt << PAGE_SHIFT;
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -3541,11 +3567,12 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
- skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
+ skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
+ true);
else
#endif
{
- skb->skb_mstamp_ns = now;
+ skb_set_delivery_time(skb, now, true);
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3579,7 +3606,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ tcp_options_write(th, NULL, &opts);
th->doff = (tcp_header_size >> 2);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
@@ -3594,7 +3621,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
synack_type, &opts);
- skb->skb_mstamp_ns = now;
+ skb_set_delivery_time(skb, now, true);
tcp_add_tx_delay(skb, tp);
return skb;
@@ -3632,7 +3659,7 @@ static void tcp_connect_init(struct sock *sk)
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr);
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
@@ -3668,7 +3695,7 @@ static void tcp_connect_init(struct sock *sk)
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
&rcv_wscale,
rcv_wnd);
@@ -3719,6 +3746,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0;
@@ -3733,8 +3761,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
* private TCP options. The cost is reduced data space in SYN :(
*/
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
+ /* Sync mss_cache after updating the mss_clamp */
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
space = min_t(size_t, space, fo->size);
@@ -3771,7 +3801,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
- syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
+ skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
@@ -4072,7 +4102,7 @@ void tcp_send_probe0(struct sock *sk)
icsk->icsk_probes_out++;
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
@@ -4092,12 +4122,14 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
struct flowi fl;
int res;
- tcp_rsk(req)->txhash = net_tx_rndhash();
+ /* Paired with WRITE_ONCE() in sock_setsockopt() */
+ if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
+ tcp_rsk(req)->txhash = net_tx_rndhash();
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
trace_tcp_retransmit_synack(sk, req);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index fbab921670cc..a8f6d9d06f2e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
*
* If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
* called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
*/
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ u64 tx_tstamp;
if (!scb->tx.delivered_mstamp)
return;
+ tx_tstamp = tcp_skb_timestamp_us(skb);
if (!rs->prior_delivered ||
- after(scb->tx.delivered, rs->prior_delivered)) {
+ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ scb->end_seq, rs->last_end_seq)) {
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
+ tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
scb->tx.first_tx_mstamp);
@@ -195,7 +200,7 @@ void tcp_rate_check_app_limited(struct sock *sk)
/* Nothing in sending host's qdisc queues or NIC tx queue. */
sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) &&
/* We are not limited by CWND. */
- tcp_packets_in_flight(tp) < tp->snd_cwnd &&
+ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) &&
/* All lost packets have been retransmitted. */
tp->lost_out <= tp->retrans_out)
tp->app_limited =
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fd113f6226ef..50abaa941387 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -2,11 +2,6 @@
#include <linux/tcp.h>
#include <net/tcp.h>
-static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
-{
- return t1 > t2 || (t1 == t2 && after(seq1, seq2));
-}
-
static u32 tcp_rack_reo_wnd(const struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -19,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
@@ -77,9 +73,9 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
!(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (!tcp_rack_sent_after(tp->rack.mstamp,
- tcp_skb_timestamp_us(skb),
- tp->rack.end_seq, scb->end_seq))
+ if (!tcp_skb_sent_after(tp->rack.mstamp,
+ tcp_skb_timestamp_us(skb),
+ tp->rack.end_seq, scb->end_seq))
break;
/* A packet is lost if it has not been s/acked beyond
@@ -140,8 +136,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
}
tp->rack.advanced = 1;
tp->rack.rtt_us = rtt_us;
- if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
- end_seq, tp->rack.end_seq)) {
+ if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
+ end_seq, tp->rack.end_seq)) {
tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
}
@@ -192,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 5842081bc8a2..862b96248a92 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
}
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
+ return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U);
}
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 20cf4a98c69d..cb79127f45c3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
@@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ retry_until = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
__dst_negative_advice(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -290,15 +291,13 @@ void tcp_delack_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- sk_mem_reclaim_partial(sk);
-
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
- goto out;
+ return;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
- goto out;
+ return;
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
@@ -317,10 +316,6 @@ void tcp_delack_timer_handler(struct sock *sk)
tcp_send_ack(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
-
-out:
- if (tcp_under_memory_pressure(sk))
- sk_mem_reclaim(sk);
}
@@ -380,7 +375,7 @@ static void tcp_probe_timer(struct sock *sk)
msecs_to_jiffies(icsk->icsk_user_timeout))
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -406,12 +401,15 @@ abort: tcp_write_err(sk);
static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- int max_retries = icsk->icsk_syn_retries ? :
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
+ int max_retries;
req->rsk_ops->syn_ack_timeout(req);
+ /* add one more retry for fastopen */
+ max_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
+
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
@@ -430,7 +428,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
if (!tp->retrans_stamp)
tp->retrans_stamp = tcp_time_stamp(tp);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
+ req->timeout << req->num_timeout, TCP_RTO_MAX);
}
@@ -574,7 +572,7 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -585,7 +583,7 @@ out_reset_timer:
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
@@ -600,11 +598,11 @@ void tcp_write_timer_handler(struct sock *sk)
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
!icsk->icsk_pending)
- goto out;
+ return;
if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
- goto out;
+ return;
}
tcp_mstamp_refresh(tcp_sk(sk));
@@ -626,9 +624,6 @@ void tcp_write_timer_handler(struct sock *sk)
tcp_probe_timer(sk);
break;
}
-
-out:
- sk_mem_reclaim(sk);
}
static void tcp_write_timer(struct timer_list *t)
@@ -743,8 +738,6 @@ static void tcp_keepalive_timer (struct timer_list *t)
elapsed = keepalive_time_when(tp) - elapsed;
}
- sk_mem_reclaim(sk);
-
resched:
inet_csk_reset_keepalive_timer (sk, elapsed);
goto out;
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 7c27aa629af1..9ae50b1bd844 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -136,6 +136,9 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
if (icsk->icsk_ulp_ops)
goto out_err;
+ if (sk->sk_socket)
+ clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+
err = ulp_ops->init(sk);
if (err)
goto out_err;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c8003c8aad2c..786848ad37ea 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd);
+ return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
* is the "Diff" from the Arizona Vegas papers.
*/
- diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
+ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
@@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* truncation robs us of full link
* utilization.
*/
- tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
+ (u32)target_cwnd + 1));
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
} else if (tcp_in_slow_start(tp)) {
@@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* The old window was too fast, so
* we slow down.
*/
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tp->snd_ssthresh
= tcp_vegas_ssthresh(tp);
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
*/
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
} else {
/* Sending just as fast as we
* should be.
@@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
}
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index cd50a61c9976..366ff6f214b2 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rtt = veno->minrtt;
- target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
- veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
+ veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd;
if (tcp_in_slow_start(tp)) {
/* Slow start. */
@@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In the "non-congestive state", increase cwnd
* every rtt.
*/
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
} else {
/* In the "congestive state", increase cwnd
* every other rtt.
*/
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
if (veno->inc &&
- tp->snd_cwnd < tp->snd_cwnd_clamp) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
veno->inc = 0;
} else
veno->inc = 1;
@@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd_cnt += acked;
}
done:
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
}
/* Wipe the slate clean for the next rtt. */
/* veno->cntrtt = 0; */
@@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
if (veno->diff < beta)
/* in "non-congestive state", cut cwnd by 1/5 */
- return max(tp->snd_cwnd * 4 / 5, 2U);
+ return max(tcp_snd_cwnd(tp) * 4 / 5, 2U);
else
/* in "congestive state", cut cwnd by 1/2 */
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static struct tcp_congestion_ops tcp_veno __read_mostly = {
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index b2e05c4cea00..c6e97141eef2 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
switch (event) {
case CA_EVENT_COMPLETE_CWR:
- tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
break;
case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 07c4c93b9fdb..18b07ff5d20e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!yeah->doing_reno_now) {
/* Scalable */
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
} else {
/* Reno */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
@@ -130,7 +130,7 @@ do_vegas:
/* Compute excess number of packets above bandwidth
* Avoid doing full 64 bit divide.
*/
- bw = tp->snd_cwnd;
+ bw = tcp_snd_cwnd(tp);
bw *= rtt - yeah->vegas.baseRTT;
do_div(bw, rtt);
queue = bw;
@@ -138,20 +138,20 @@ do_vegas:
if (queue > TCP_YEAH_ALPHA ||
rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
if (queue > TCP_YEAH_ALPHA &&
- tp->snd_cwnd > yeah->reno_count) {
+ tcp_snd_cwnd(tp) > yeah->reno_count) {
u32 reduction = min(queue / TCP_YEAH_GAMMA ,
- tp->snd_cwnd >> TCP_YEAH_EPSILON);
+ tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON);
- tp->snd_cwnd -= reduction;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction);
- tp->snd_cwnd = max(tp->snd_cwnd,
- yeah->reno_count);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
+ yeah->reno_count));
- tp->snd_ssthresh = tp->snd_cwnd;
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
if (yeah->reno_count <= 2)
- yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+ yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U);
else
yeah->reno_count++;
@@ -176,7 +176,7 @@ do_vegas:
*/
yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
yeah->vegas.beg_snd_nxt = tp->snd_nxt;
- yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+ yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp);
/* Wipe the slate clean for the next RTT. */
yeah->vegas.cntRTT = 0;
@@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
if (yeah->doing_reno_now < TCP_YEAH_RHO) {
reduction = yeah->lastQ;
- reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
+ reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U));
- reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+ reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA);
} else
- reduction = max(tp->snd_cwnd>>1, 2U);
+ reduction = max(tcp_snd_cwnd(tp)>>1, 2U);
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return max_t(int, tp->snd_cwnd - reduction, 2);
+ return max_t(int, tcp_snd_cwnd(tp) - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 464590ea922e..6a320a614e54 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -125,6 +125,8 @@ EXPORT_SYMBOL(sysctl_udp_mem);
atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp;
EXPORT_SYMBOL(udp_memory_allocated);
+DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
+EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc);
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
@@ -244,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rand = prandom_u32();
+ rand = get_random_u32();
first = reciprocal_scale(rand, remaining) + low;
/*
* force rand to be an odd multiple of UDP_HTABLE_SIZE
@@ -446,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk, false))
+ if (result && !reuseport_has_conns(sk))
return result;
result = result ? : sk;
@@ -781,6 +783,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
*/
if (tunnel) {
/* ...not for tunnels though: we don't have a sending socket */
+ if (udp_sk(sk)->encap_err_rcv)
+ udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2);
goto out;
}
if (!inet->recverr) {
@@ -1461,11 +1465,11 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
sk->sk_forward_alloc += size;
- amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
+ amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
sk->sk_forward_alloc -= amt;
if (amt)
- __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
+ __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
atomic_sub(size, &sk->sk_rmem_alloc);
@@ -1558,7 +1562,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
spin_lock(&list->lock);
if (size >= sk->sk_forward_alloc) {
amt = sk_mem_pages(size);
- delta = amt << SK_MEM_QUANTUM_SHIFT;
+ delta = amt << PAGE_SHIFT;
if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
err = -ENOBUFS;
spin_unlock(&list->lock);
@@ -1594,7 +1598,7 @@ drop:
}
EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
-void udp_destruct_sock(struct sock *sk)
+void udp_destruct_common(struct sock *sk)
{
/* reclaim completely the forward allocated memory */
struct udp_sock *up = udp_sk(sk);
@@ -1607,18 +1611,22 @@ void udp_destruct_sock(struct sock *sk)
kfree_skb(skb);
}
udp_rmem_release(sk, total, 0, true);
+}
+EXPORT_SYMBOL_GPL(udp_destruct_common);
+static void udp_destruct_sock(struct sock *sk)
+{
+ udp_destruct_common(sk);
inet_sock_destruct(sk);
}
-EXPORT_SYMBOL_GPL(udp_destruct_sock);
int udp_init_sock(struct sock *sk)
{
skb_queue_head_init(&udp_sk(sk)->reader_queue);
sk->sk_destruct = udp_destruct_sock;
+ set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
return 0;
}
-EXPORT_SYMBOL_GPL(udp_init_sock);
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
@@ -1726,7 +1734,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
EXPORT_SYMBOL(udp_ioctl);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *off, int *err)
+ int *off, int *err)
{
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff_head *queue;
@@ -1735,7 +1743,6 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
int error;
queue = &udp_sk(sk)->reader_queue;
- flags |= noblock ? MSG_DONTWAIT : 0;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb;
@@ -1796,55 +1803,42 @@ busy_check:
}
EXPORT_SYMBOL(__skb_recv_udp);
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- int copied = 0;
-
- while (1) {
- struct sk_buff *skb;
- int err, used;
-
- skb = skb_recv_udp(sk, 0, 1, &err);
- if (!skb)
- return err;
+ struct sk_buff *skb;
+ int err, copied;
- if (udp_lib_checksum_complete(skb)) {
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
- IS_UDPLITE(sk));
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
- continue;
- }
+try_again:
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
+ if (!skb)
+ return err;
- used = recv_actor(desc, skb, 0, skb->len);
- if (used <= 0) {
- if (!copied)
- copied = used;
- kfree_skb(skb);
- break;
- } else if (used <= skb->len) {
- copied += used;
- }
+ if (udp_lib_checksum_complete(skb)) {
+ int is_udplite = IS_UDPLITE(sk);
+ struct net *net = sock_net(sk);
+ __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite);
+ __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite);
+ atomic_inc(&sk->sk_drops);
kfree_skb(skb);
- if (!desc->count)
- break;
+ goto try_again;
}
+ WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+ copied = recv_actor(sk, skb);
+ kfree_skb(skb);
+
return copied;
}
-EXPORT_SYMBOL(udp_read_sock);
+EXPORT_SYMBOL(udp_read_skb);
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
@@ -1859,7 +1853,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
@@ -1910,7 +1904,7 @@ try_again:
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (sin) {
@@ -2093,16 +2087,20 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
+ int drop_reason;
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
+ if (rc == -ENOMEM) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
- else
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ } else {
UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS,
is_udplite);
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ }
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -2120,14 +2118,17 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
+ int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
/*
* Charge it to the socket, dropping if the queue is full.
*/
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
nf_reset_ct(skb);
if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
@@ -2204,8 +2205,10 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
- if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto drop;
+ }
udp_csum_pull_header(skb);
@@ -2213,11 +2216,12 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
return __udp_queue_rcv_skb(sk, skb);
csum_error:
+ drop_reason = SKB_DROP_REASON_UDP_CSUM;
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return -1;
}
@@ -2554,8 +2558,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie, rmt_addr,
- loc_addr, ports, dif, sdif))
+ if (inet_match(net, sk, acookie, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -2938,6 +2941,8 @@ struct proto udp_prot = {
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
.memory_allocated = &udp_memory_allocated,
+ .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
+
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
@@ -2960,7 +2965,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) {
@@ -2993,7 +2998,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
do {
sk = sk_next(sk);
@@ -3050,7 +3055,7 @@ void udp_seq_stop(struct seq_file *seq, void *v)
if (state->bpf_seq_afinfo)
afinfo = state->bpf_seq_afinfo;
else
- afinfo = PDE_DATA(file_inode(seq->file));
+ afinfo = pde_data(file_inode(seq->file));
if (state->bucket <= afinfo->udp_table->mask)
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
@@ -3253,19 +3258,15 @@ u32 udp_flow_hashrnd(void)
}
EXPORT_SYMBOL(udp_flow_hashrnd);
-static void __udp_sysctl_init(struct net *net)
+static int __net_init udp_sysctl_init(struct net *net)
{
- net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
- net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+ net->ipv4.sysctl_udp_rmem_min = PAGE_SIZE;
+ net->ipv4.sysctl_udp_wmem_min = PAGE_SIZE;
#ifdef CONFIG_NET_L3_MASTER_DEV
net->ipv4.sysctl_udp_l3mdev_accept = 0;
#endif
-}
-static int __net_init udp_sysctl_init(struct net *net)
-{
- __udp_sysctl_init(net);
return 0;
}
@@ -3341,8 +3342,6 @@ void __init udp_init(void)
sysctl_udp_mem[1] = limit;
sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
- __udp_sysctl_init(&init_net);
-
/* 16 spinlocks per cpu */
udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log,
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index bbe6569c9ad3..e5dc91d0e079 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -11,14 +11,13 @@
static struct proto *udpv6_prot_saved __read_mostly;
static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags,
- addr_len);
+ return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len);
#endif
- return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
+ return udp_prot.recvmsg(sk, msg, len, flags, addr_len);
}
static bool udp_sk_has_data(struct sock *sk)
@@ -61,7 +60,7 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
}
static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -71,10 +70,10 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return sk_udp_recvmsg(sk, msg, len, flags, addr_len);
if (!psock_has_data(psock)) {
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
@@ -84,12 +83,12 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = udp_msg_wait_data(sk, psock, timeo);
if (data) {
if (psock_has_data(psock))
goto msg_bytes_ready;
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
copied = -EAGAIN;
@@ -142,14 +141,14 @@ int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
if (restore) {
sk->sk_write_space = psock->saved_write_space;
- WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ sock_replace_proto(sk, psock->sk_proto);
return 0;
}
if (sk->sk_family == AF_INET6)
udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
- WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
+ sock_replace_proto(sk, &udp_bpf_prots[family]);
return 0;
}
EXPORT_SYMBOL_GPL(udp_bpf_update_proto);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 2878d8285caf..4ba7a88a1b1d 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -17,8 +17,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
void udp_destroy_sock(struct sock *sk);
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 8efaf8c3fe2a..8242c8947340 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -72,6 +72,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->encap_type = cfg->encap_type;
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+ udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv;
udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
udp_sk(sk)->gro_receive = cfg->gro_receive;
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index b91003538d87..bc3a043a5d5c 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
- if (node->dev != dev)
+ if (list_entry_is_head(node, &info->shared->devices, list))
return;
list_del(&node->list);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index cd1cd68adeec..e0c9cc39b81e 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -17,6 +17,14 @@
struct udp_table udplite_table __read_mostly;
EXPORT_SYMBOL(udplite_table);
+/* Designate sk as UDP-Lite socket */
+static int udplite_sk_init(struct sock *sk)
+{
+ udp_init_sock(sk);
+ udp_sk(sk)->pcflag = UDPLITE_BIT;
+ return 0;
+}
+
static int udplite_rcv(struct sk_buff *skb)
{
return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
@@ -51,7 +59,10 @@ struct proto udplite_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,
.get_port = udp_v4_get_port,
+
.memory_allocated = &udp_memory_allocated,
+ .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
+
.sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9e83bcb6bc99..3d0dfa6cf9f9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
- fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
+ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
fl4->flowi4_mark = mark;
if (saddr)
fl4->saddr = saddr->a4;
- fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
-
rt = __ip_route_output_key(net, fl4);
if (!IS_ERR(rt))
return &rt->dst;
@@ -77,7 +75,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_iif = fl4->flowi4_iif;
xdst->u.dst.dev = dev;
- dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 2fe5860c21d6..b146ce88c5d0 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -304,4 +304,3 @@ void __init xfrm4_protocol_init(void)
{
xfrm_input_register_afinfo(&xfrm4_input_afinfo);
}
-EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 9d4f418f1bf8..8489fa106583 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -22,13 +22,17 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
return ip_hdr(skb)->protocol;
}
-static int ipip_init_state(struct xfrm_state *x)
+static int ipip_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
- if (x->props.mode != XFRM_MODE_TUNNEL)
+ if (x->props.mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG(extack, "IPv4 tunnel can only be used with tunnel mode");
return -EINVAL;
+ }
- if (x->encap)
+ if (x->encap) {
+ NL_SET_ERR_MSG(extack, "IPv4 tunnel is not compatible with encapsulation");
return -EINVAL;
+ }
x->props.header_len = sizeof(struct iphdr);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index bf2e5e5fe142..658bfed1df8b 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -7,6 +7,7 @@
menuconfig IPV6
tristate "The IPv6 protocol"
default y
+ select CRYPTO_LIB_SHA1
help
Support for IP version 6 (IPv6).
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3eee17790a82..9c3f5202a97b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -104,7 +104,7 @@ static inline u32 cstamp_delta(unsigned long cstamp)
static inline s32 rfc3315_s14_backoff_init(s32 irt)
{
/* multiply 'initial retransmission time' by 0.9 .. 1.1 */
- u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt;
+ u64 tmp = (900000 + prandom_u32_max(200001)) * (u64)irt;
do_div(tmp, 1000000);
return (s32)tmp;
}
@@ -112,11 +112,11 @@ static inline s32 rfc3315_s14_backoff_init(s32 irt)
static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt)
{
/* multiply 'retransmission timeout' by 1.9 .. 2.1 */
- u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt;
+ u64 tmp = (1900000 + prandom_u32_max(200001)) * (u64)rt;
do_div(tmp, 1000000);
if ((s32)tmp > mrt) {
/* multiply 'maximum retransmission time' by 0.9 .. 1.1 */
- tmp = (900000 + prandom_u32() % 200001) * (u64)mrt;
+ tmp = (900000 + prandom_u32_max(200001)) * (u64)mrt;
do_div(tmp, 1000000);
}
return (s32)tmp;
@@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr,
#define IN6_ADDR_HSIZE_SHIFT 8
#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
-/*
- * Configured unicast address hash table
- */
-static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_SPINLOCK(addrconf_hash_lock);
-static void addrconf_verify(void);
-static void addrconf_verify_rtnl(void);
-static void addrconf_verify_work(struct work_struct *);
+static void addrconf_verify(struct net *net);
+static void addrconf_verify_rtnl(struct net *net);
static struct workqueue_struct *addrconf_wq;
-static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);
static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -342,7 +335,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
{
int i;
- idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+ idev->stats.ipv6 = alloc_percpu_gfp(struct ipstats_mib, GFP_KERNEL_ACCOUNT);
if (!idev->stats.ipv6)
goto err_ip;
@@ -358,7 +351,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
if (!idev->stats.icmpv6dev)
goto err_icmp;
idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!idev->stats.icmpv6msgdev)
goto err_icmpmsg;
@@ -379,10 +372,10 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ASSERT_RTNL();
- if (dev->mtu < IPV6_MIN_MTU)
+ if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);
- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
+ ndev = kzalloc(sizeof(*ndev), GFP_KERNEL_ACCOUNT);
if (!ndev)
return ERR_PTR(err);
@@ -405,23 +398,24 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (ndev->cnf.forwarding)
dev_disable_lro(dev);
/* We refer to the device */
- dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);
if (snmp6_alloc_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
__func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
- dev_put_track(dev, &ndev->dev_tracker);
+ netdev_put(dev, &ndev->dev_tracker);
kfree(ndev);
return ERR_PTR(err);
}
- if (snmp6_register_dev(ndev) < 0) {
- netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name);
- goto err_release;
+ if (dev != blackhole_netdev) {
+ if (snmp6_register_dev(ndev) < 0) {
+ netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name);
+ goto err_release;
+ }
}
-
/* One reference from device. */
refcount_set(&ndev->refcnt, 1);
@@ -452,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
- err = addrconf_sysctl_register(ndev);
- if (err) {
- ipv6_mc_destroy_dev(ndev);
- snmp6_unregister_dev(ndev);
- goto err_release;
+ if (dev != blackhole_netdev) {
+ err = addrconf_sysctl_register(ndev);
+ if (err) {
+ ipv6_mc_destroy_dev(ndev);
+ snmp6_unregister_dev(ndev);
+ goto err_release;
+ }
}
/* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev);
- /* Join interface-local all-node multicast group */
- ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
-
- /* Join all-node multicast group */
- ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
+ if (dev != blackhole_netdev) {
+ /* Join interface-local all-node multicast group */
+ ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
- /* Join all-router multicast group if forwarding is set */
- if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
- ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+ /* Join all-node multicast group */
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
+ /* Join all-router multicast group if forwarding is set */
+ if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
+ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+ }
return ndev;
err_release:
@@ -554,7 +551,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- devconf->mc_forwarding) < 0)
+ atomic_read(&devconf->mc_forwarding)) < 0)
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -800,6 +797,7 @@ static void dev_forward_change(struct inet6_dev *idev)
{
struct net_device *dev;
struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
if (!idev)
return;
@@ -818,14 +816,24 @@ static void dev_forward_change(struct inet6_dev *idev)
}
}
+ read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa->flags&IFA_F_TENTATIVE)
continue;
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ }
+ read_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
if (idev->cnf.forwarding)
addrconf_join_anycast(ifa);
else
addrconf_leave_anycast(ifa);
}
+
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
@@ -1011,9 +1019,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
{
struct inet6_ifaddr *ifp;
- hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev)
return true;
@@ -1024,20 +1030,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
{
- unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+ struct net *net = dev_net(dev);
+ unsigned int hash = inet6_addr_hash(net, &ifa->addr);
int err = 0;
- spin_lock(&addrconf_hash_lock);
+ spin_lock(&net->ipv6.addrconf_hash_lock);
/* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+ if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
- hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+ hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
}
- spin_unlock(&addrconf_hash_lock);
+ spin_unlock(&net->ipv6.addrconf_hash_lock);
return err;
}
@@ -1102,10 +1109,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- if (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy)
- f6i->dst_nopolicy = true;
-
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *cfg->pfx;
@@ -1119,6 +1122,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
ifa->prefix_len = cfg->plen;
ifa->rt_priority = cfg->rt_priority;
ifa->flags = cfg->ifa_flags;
+ ifa->ifa_proto = cfg->ifa_proto;
/* No need to add the TENTATIVE flag for addresses with NODAD */
if (!(cfg->ifa_flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
@@ -1261,9 +1265,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
static void ipv6_del_addr(struct inet6_ifaddr *ifp)
{
- int state;
enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP;
+ struct net *net = dev_net(ifp->idev->dev);
unsigned long expires;
+ int state;
ASSERT_RTNL();
@@ -1275,9 +1280,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (state == INET6_IFADDR_STATE_DEAD)
goto out;
- spin_lock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);
hlist_del_init_rcu(&ifp->addr_lst);
- spin_unlock_bh(&addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
write_lock_bh(&ifp->idev->lock);
@@ -1839,8 +1844,8 @@ out:
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
- u32 banned_flags)
+static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ u32 banned_flags)
{
struct inet6_ifaddr *ifp;
int err = -EADDRNOTAVAIL;
@@ -1920,10 +1925,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
if (skip_dev_check)
dev = NULL;
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
ndev = ifp->idev->dev;
- if (!net_eq(dev_net(ndev), net))
- continue;
if (l3mdev_master_dev_rcu(ndev) != l3mdev)
continue;
@@ -2027,9 +2030,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
struct inet6_ifaddr *ifp, *result = NULL;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -2096,7 +2097,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
- struct net *net = dev_net(ifp->idev->dev);
+ struct net *net = dev_net(idev->dev);
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@@ -2589,7 +2590,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
__u32 valid_lft, u32 prefered_lft)
{
struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
- int create = 0;
+ int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
@@ -2600,6 +2601,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
.valid_lft = valid_lft,
.preferred_lft = prefered_lft,
.scope = addr_type & IPV6_ADDR_SCOPE_MASK,
+ .ifa_proto = IFAPROT_KERNEL_RA
};
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -2633,19 +2635,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
unsigned long now;
u32 stored_lft;
- /* Update lifetime (RFC4862 5.5.3 e)
- * We deviate from RFC4862 by honoring all Valid Lifetimes to
- * improve the reaction of SLAAC to renumbering events
- * (draft-gont-6man-slaac-renum-06, Section 4.2)
- */
+ /* update lifetime (RFC2462 5.5.3 e) */
spin_lock_bh(&ifp->lock);
now = jiffies;
if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
-
if (!create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
ifp->tstamp = now;
@@ -2662,7 +2677,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
create, now);
in6_ifa_put(ifp);
- addrconf_verify();
+ addrconf_verify(net);
}
return 0;
@@ -2974,7 +2989,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
manage_tempaddrs(idev, ifp, cfg->valid_lft,
cfg->preferred_lft, true, jiffies);
in6_ifa_put(ifp);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
@@ -3014,7 +3029,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
manage_tempaddrs(idev, ifp, 0, 0, false,
jiffies);
ipv6_del_addr(ifp);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
false, pfx, dev->ifindex);
@@ -3071,7 +3086,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
}
static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
- int plen, int scope)
+ int plen, int scope, u8 proto)
{
struct inet6_ifaddr *ifp;
struct ifa6_config cfg = {
@@ -3080,7 +3095,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
.ifa_flags = IFA_F_PERMANENT,
.valid_lft = INFINITY_LIFE_TIME,
.preferred_lft = INFINITY_LIFE_TIME,
- .scope = scope
+ .scope = scope,
+ .ifa_proto = proto
};
ifp = ipv6_add_addr(idev, &cfg, true, NULL);
@@ -3125,7 +3141,7 @@ static void add_v4_addrs(struct inet6_dev *idev)
}
if (addr.s6_addr32[3]) {
- add_addr(idev, &addr, plen, scope);
+ add_addr(idev, &addr, plen, scope, IFAPROT_UNSPEC);
addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
GFP_KERNEL);
return;
@@ -3148,7 +3164,8 @@ static void add_v4_addrs(struct inet6_dev *idev)
flag |= IFA_HOST;
}
- add_addr(idev, &addr, plen, flag);
+ add_addr(idev, &addr, plen, flag,
+ IFAPROT_UNSPEC);
addrconf_prefix_route(&addr, plen, 0, idev->dev,
0, pflags, GFP_KERNEL);
}
@@ -3171,7 +3188,7 @@ static void init_loopback(struct net_device *dev)
return;
}
- add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+ add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFAPROT_KERNEL_LO);
}
void addrconf_add_linklocal(struct inet6_dev *idev,
@@ -3183,7 +3200,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
.ifa_flags = flags | IFA_F_PERMANENT,
.valid_lft = INFINITY_LIFE_TIME,
.preferred_lft = INFINITY_LIFE_TIME,
- .scope = IFA_LINK
+ .scope = IFA_LINK,
+ .ifa_proto = IFAPROT_KERNEL_LL
};
struct inet6_ifaddr *ifp;
@@ -3539,11 +3557,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
fallthrough;
case NETDEV_UP:
case NETDEV_CHANGE:
- if (dev->flags & IFF_SLAVE)
+ if (idev && idev->cnf.disable_ipv6)
break;
- if (idev && idev->cnf.disable_ipv6)
+ if (dev->flags & IFF_SLAVE) {
+ if (event == NETDEV_UP && !IS_ERR_OR_NULL(idev) &&
+ dev->flags & IFF_UP && dev->flags & IFF_MULTICAST)
+ ipv6_mc_up(idev);
break;
+ }
if (event == NETDEV_UP) {
/* restore routes for permanent addresses */
@@ -3717,8 +3739,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa, *tmp;
+ struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
bool keep_addr = false;
+ bool was_ready;
int state, i;
ASSERT_RTNL();
@@ -3759,9 +3783,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
- struct hlist_head *h = &inet6_addr_lst[i];
+ struct hlist_head *h = &net->ipv6.inet6_addr_lst[i];
- spin_lock_bh(&addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);
restart:
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
@@ -3777,14 +3801,17 @@ restart:
}
}
}
- spin_unlock_bh(&addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);
}
write_lock_bh(&idev->lock);
addrconf_del_rs_timer(idev);
- /* Step 2: clear flags for stateless addrconf */
+ /* Step 2: clear flags for stateless addrconf, repeated down
+ * detection
+ */
+ was_ready = idev->if_flags & IF_READY;
if (!unregister)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
@@ -3805,16 +3832,23 @@ restart:
write_lock_bh(&idev->lock);
}
- list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list)
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ write_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
struct fib6_info *rt = NULL;
bool keep;
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
+
addrconf_del_dad_work(ifa);
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (keep) {
@@ -3845,20 +3879,19 @@ restart:
addrconf_leave_solict(ifa->idev, &ifa->addr);
}
- write_lock_bh(&idev->lock);
if (!keep) {
+ write_lock_bh(&idev->lock);
list_del_rcu(&ifa->if_list);
+ write_unlock_bh(&idev->lock);
in6_ifa_put(ifa);
}
}
- write_unlock_bh(&idev->lock);
-
/* Step 5: Discard anycast and multicast list */
if (unregister) {
ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
- } else {
+ } else if (was_ready) {
ipv6_mc_down(idev);
}
@@ -3934,7 +3967,7 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
- rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);
+ rand_num = prandom_u32_max(idev->cnf.rtr_solicit_delay ?: 1);
nonce = 0;
if (idev->cnf.enhanced_dad ||
@@ -3957,8 +3990,6 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
addrconf_join_solict(dev, &ifp->addr);
- prandom_seed((__force u32) ifp->addr.s6_addr32[3]);
-
read_lock_bh(&idev->lock);
spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD)
@@ -4186,7 +4217,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits != 0 &&
- (dev->flags&IFF_LOOPBACK) == 0;
+ (dev->flags & IFF_LOOPBACK) == 0 &&
+ (dev->type != ARPHRD_TUNNEL);
read_unlock_bh(&ifp->idev->lock);
/* While dad is in progress mld report's source address is in6_addrany.
@@ -4233,7 +4265,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
* before this temporary address becomes deprecated.
*/
if (ifp->flags & IFA_F_TEMPORARY)
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(dev_net(dev));
}
static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
@@ -4275,10 +4307,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
+ hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket],
addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
/* sync with offset */
if (p < state->offset) {
p++;
@@ -4301,8 +4331,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
state->offset++;
return ifa;
}
@@ -4310,9 +4338,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
hlist_for_each_entry_rcu(ifa,
- &inet6_addr_lst[state->bucket], addr_lst) {
- if (!net_eq(dev_net(ifa->idev->dev), net))
- continue;
+ &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) {
return ifa;
}
}
@@ -4400,9 +4426,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
int ret = 0;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr) &&
(ifp->flags & IFA_F_HOMEADDRESS)) {
ret = 1;
@@ -4440,9 +4464,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
hash = inet6_addr_hash(net, addr);
hash_found = false;
- hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
+ hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) {
if (ipv6_addr_equal(&ifp->addr, addr)) {
hash_found = true;
@@ -4471,7 +4493,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs,
* Periodic address status verification
*/
-static void addrconf_verify_rtnl(void)
+static void addrconf_verify_rtnl(struct net *net)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
@@ -4483,11 +4505,11 @@ static void addrconf_verify_rtnl(void)
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
- cancel_delayed_work(&addr_chk_work);
+ cancel_delayed_work(&net->ipv6.addr_chk_work);
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) {
unsigned long age;
/* When setting preferred_lft to a value not zero or
@@ -4502,6 +4524,39 @@ restart:
/* We try to batch several events at once. */
age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+ if ((ifp->flags&IFA_F_TEMPORARY) &&
+ !(ifp->flags&IFA_F_TENTATIVE) &&
+ ifp->prefered_lft != INFINITY_LIFE_TIME &&
+ !ifp->regen_count && ifp->ifpub) {
+ /* This is a non-regenerated temporary addr. */
+
+ unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+ ifp->idev->cnf.dad_transmits *
+ max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+
+ if (age + regen_advance >= ifp->prefered_lft) {
+ struct inet6_ifaddr *ifpub = ifp->ifpub;
+ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ;
+
+ ifp->regen_count++;
+ in6_ifa_hold(ifp);
+ in6_ifa_hold(ifpub);
+ spin_unlock(&ifp->lock);
+
+ spin_lock(&ifpub->lock);
+ ifpub->regen_count = 0;
+ spin_unlock(&ifpub->lock);
+ rcu_read_unlock_bh();
+ ipv6_create_tempaddr(ifpub, true);
+ in6_ifa_put(ifpub);
+ in6_ifa_put(ifp);
+ rcu_read_lock_bh();
+ goto restart;
+ } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
+ }
+
if (ifp->valid_lft != INFINITY_LIFE_TIME &&
age >= ifp->valid_lft) {
spin_unlock(&ifp->lock);
@@ -4535,35 +4590,6 @@ restart:
in6_ifa_put(ifp);
goto restart;
}
- } else if ((ifp->flags&IFA_F_TEMPORARY) &&
- !(ifp->flags&IFA_F_TENTATIVE)) {
- unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
- ifp->idev->cnf.dad_transmits *
- max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
-
- if (age >= ifp->prefered_lft - regen_advance) {
- struct inet6_ifaddr *ifpub = ifp->ifpub;
- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
- next = ifp->tstamp + ifp->prefered_lft * HZ;
- if (!ifp->regen_count && ifpub) {
- ifp->regen_count++;
- in6_ifa_hold(ifp);
- in6_ifa_hold(ifpub);
- spin_unlock(&ifp->lock);
-
- spin_lock(&ifpub->lock);
- ifpub->regen_count = 0;
- spin_unlock(&ifpub->lock);
- rcu_read_unlock_bh();
- ipv6_create_tempaddr(ifpub, true);
- in6_ifa_put(ifpub);
- in6_ifa_put(ifp);
- rcu_read_lock_bh();
- goto restart;
- }
- } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
- next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
- spin_unlock(&ifp->lock);
} else {
/* ifp->prefered_lft <= ifp->valid_lft */
if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -4586,20 +4612,23 @@ restart:
pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched);
- mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
+ mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
static void addrconf_verify_work(struct work_struct *w)
{
+ struct net *net = container_of(to_delayed_work(w), struct net,
+ ipv6.addr_chk_work);
+
rtnl_lock();
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
rtnl_unlock();
}
-static void addrconf_verify(void)
+static void addrconf_verify(struct net *net)
{
- mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+ mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0);
}
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
@@ -4628,6 +4657,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_FLAGS] = { .len = sizeof(u32) },
[IFA_RT_PRIORITY] = { .len = sizeof(u32) },
[IFA_TARGET_NETNSID] = { .type = NLA_S32 },
+ [IFA_PROTO] = { .type = NLA_U8 },
};
static int
@@ -4695,7 +4725,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
return 0;
}
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
+static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
+ struct ifa6_config *cfg)
{
u32 flags;
clock_t expires;
@@ -4752,6 +4783,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
ifp->tstamp = jiffies;
ifp->valid_lft = cfg->valid_lft;
ifp->prefered_lft = cfg->preferred_lft;
+ ifp->ifa_proto = cfg->ifa_proto;
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
ifp->rt_priority = cfg->rt_priority;
@@ -4809,7 +4841,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
jiffies);
}
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(net);
return 0;
}
@@ -4845,6 +4877,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFA_RT_PRIORITY])
cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+ if (tb[IFA_PROTO])
+ cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+
cfg.valid_lft = INFINITY_LIFE_TIME;
cfg.preferred_lft = INFINITY_LIFE_TIME;
@@ -4896,7 +4931,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
!(nlh->nlmsg_flags & NLM_F_REPLACE))
err = -EEXIST;
else
- err = inet6_addr_modify(ifa, &cfg);
+ err = inet6_addr_modify(net, ifa, &cfg);
in6_ifa_put(ifa);
@@ -4948,6 +4983,7 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(16) /* IFA_ADDRESS */
+ nla_total_size(sizeof(struct ifa_cacheinfo))
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(1) /* IFA_PROTO */
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
@@ -4985,6 +5021,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
+ spin_lock_bh(&ifa->lock);
if (!((ifa->flags&IFA_F_PERMANENT) &&
(ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
@@ -5006,6 +5043,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
+ spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
@@ -5025,6 +5063,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
goto error;
+ if (ifa->ifa_proto &&
+ nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto))
+ goto error;
+
nlmsg_end(skb, nlh);
return 0;
@@ -5134,9 +5176,9 @@ next:
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
- for (ifmca = rcu_dereference(idev->mc_list);
+ for (ifmca = rtnl_dereference(idev->mc_list);
ifmca;
- ifmca = rcu_dereference(ifmca->next), ip_idx++) {
+ ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
@@ -5520,7 +5562,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
- array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+ array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
@@ -5548,6 +5590,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
+ array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
}
static inline size_t inet6_ifla6_size(void)
@@ -5781,7 +5824,7 @@ update_lft:
write_unlock_bh(&idev->lock);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
- addrconf_verify_rtnl();
+ addrconf_verify_rtnl(dev_net(dev));
return 0;
}
@@ -6999,6 +7042,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra2 = (void *)SYSCTL_ONE,
},
{
+ .procname = "accept_untracked_na",
+ .data = &ipv6_devconf.accept_untracked_na,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
/* sentinel */
}
};
@@ -7010,7 +7062,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
- table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL);
+ table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL_ACCOUNT);
if (!table)
goto out;
@@ -7098,6 +7150,14 @@ static int __net_init addrconf_init_net(struct net *net)
int err = -ENOMEM;
struct ipv6_devconf *all, *dflt;
+ spin_lock_init(&net->ipv6.addrconf_hash_lock);
+ INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work);
+ net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->ipv6.inet6_addr_lst)
+ goto err_alloc_addr;
+
all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@@ -7106,9 +7166,8 @@ static int __net_init addrconf_init_net(struct net *net)
if (!dflt)
goto err_alloc_dflt;
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- !net_eq(net, &init_net)) {
- switch (sysctl_devconf_inherit_init_net) {
+ if (!net_eq(net, &init_net)) {
+ switch (net_inherit_devconf()) {
case 1: /* copy from init_net */
memcpy(all, init_net.ipv6.devconf_all,
sizeof(ipv6_devconf));
@@ -7155,15 +7214,21 @@ err_reg_dflt:
__addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
kfree(dflt);
+ net->ipv6.devconf_dflt = NULL;
#endif
err_alloc_dflt:
kfree(all);
+ net->ipv6.devconf_all = NULL;
err_alloc_all:
+ kfree(net->ipv6.inet6_addr_lst);
+err_alloc_addr:
return err;
}
static void __net_exit addrconf_exit_net(struct net *net)
{
+ int i;
+
#ifdef CONFIG_SYSCTL
__addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
NETCONFA_IFINDEX_DEFAULT);
@@ -7171,7 +7236,19 @@ static void __net_exit addrconf_exit_net(struct net *net)
NETCONFA_IFINDEX_ALL);
#endif
kfree(net->ipv6.devconf_dflt);
+ net->ipv6.devconf_dflt = NULL;
kfree(net->ipv6.devconf_all);
+ net->ipv6.devconf_all = NULL;
+
+ cancel_delayed_work_sync(&net->ipv6.addr_chk_work);
+ /*
+ * Check hash table, then free it.
+ */
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i]));
+
+ kfree(net->ipv6.inet6_addr_lst);
+ net->ipv6.inet6_addr_lst = NULL;
}
static struct pernet_operations addrconf_ops = {
@@ -7194,7 +7271,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
int __init addrconf_init(void)
{
struct inet6_dev *idev;
- int i, err;
+ int err;
err = ipv6_addr_label_init();
if (err < 0) {
@@ -7213,26 +7290,8 @@ int __init addrconf_init(void)
goto out_nowq;
}
- /* The addrconf netdev notifier requires that loopback_dev
- * has it's ipv6 private information allocated and setup
- * before it can bring up and give link-local addresses
- * to other devices which are up.
- *
- * Unfortunately, loopback_dev is not necessarily the first
- * entry in the global dev_base list of net devices. In fact,
- * it is likely to be the very last entry on that list.
- * So this causes the notifier registry below to try and
- * give link-local addresses to all devices besides loopback_dev
- * first, then loopback_dev, which cases all the non-loopback_dev
- * devices to fail to get a link-local address.
- *
- * So, as a temporary fix, allocate the ipv6 structure for
- * loopback_dev first by hand.
- * Longer term, all of the dependencies ipv6 has upon the loopback
- * device and it being up should be removed.
- */
rtnl_lock();
- idev = ipv6_add_dev(init_net.loopback_dev);
+ idev = ipv6_add_dev(blackhole_netdev);
rtnl_unlock();
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
@@ -7241,12 +7300,9 @@ int __init addrconf_init(void)
ip6_route_init_special_entries();
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
- INIT_HLIST_HEAD(&inet6_addr_lst[i]);
-
register_netdevice_notifier(&ipv6_dev_notf);
- addrconf_verify();
+ addrconf_verify(&init_net);
rtnl_af_register(&inet6_ops);
@@ -7304,7 +7360,6 @@ out:
void addrconf_cleanup(void)
{
struct net_device *dev;
- int i;
unregister_netdevice_notifier(&ipv6_dev_notf);
unregister_pernet_subsys(&addrconf_ops);
@@ -7322,14 +7377,6 @@ void addrconf_cleanup(void)
}
addrconf_ifdown(init_net.loopback_dev, true);
- /*
- * Check hash table.
- */
- spin_lock_bh(&addrconf_hash_lock);
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
- WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
- spin_unlock_bh(&addrconf_hash_lock);
- cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
destroy_workqueue(addrconf_wq);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 881d1477d24a..507a8353a6bd 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -263,7 +263,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
#endif
- dev_put_track(dev, &idev->dev_tracker);
+ netdev_put(dev, &idev->dev_tracker);
if (!idev->dead) {
pr_warn("Freeing alive inet6 device %p\n", idev);
return;
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8a22486cf270..17ac45aa7194 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -437,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
{
struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
ifal->ifal_family = AF_INET6;
+ ifal->__ifal_reserved = 0;
ifal->ifal_prefixlen = prefixlen;
ifal->ifal_flags = 0;
ifal->ifal_index = ifindex;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8fe7900f1949..024191004982 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -63,6 +63,7 @@
#include <net/compat.h>
#include <net/xfrm.h>
#include <net/ioam6.h>
+#include <net/rawv6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -108,6 +109,12 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+void inet6_sock_destruct(struct sock *sk)
+{
+ inet6_cleanup_sock(sk);
+ inet_sock_destruct(sk);
+}
+
static int inet6_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
@@ -200,7 +207,7 @@ lookup_protocol:
inet->hdrincl = 1;
}
- sk->sk_destruct = inet_sock_destruct;
+ sk->sk_destruct = inet6_sock_destruct;
sk->sk_family = PF_INET6;
sk->sk_protocol = protocol;
@@ -226,7 +233,7 @@ lookup_protocol:
RCU_INIT_POINTER(inet->mc_list, NULL);
inet->rcv_tos = 0;
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -318,7 +325,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
/* Binding to v4-mapped address on a v6-only socket
* makes no sense
*/
- if (sk->sk_ipv6only) {
+ if (ipv6_only_sock(sk)) {
err = -EINVAL;
goto out;
}
@@ -441,11 +448,14 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
+ const struct proto *prot;
int err = 0;
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
/* If the socket has its own bind function then use it. */
- if (sk->sk_prot->bind)
- return sk->sk_prot->bind(sk, uaddr, addr_len);
+ if (prot->bind)
+ return prot->bind(sk, uaddr, addr_len);
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -506,6 +516,12 @@ void inet6_destroy_sock(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+void inet6_cleanup_sock(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+EXPORT_SYMBOL_GPL(inet6_cleanup_sock);
+
/*
* This does both peername and sockname.
*/
@@ -555,6 +571,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
void __user *argp = (void __user *)arg;
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
+ const struct proto *prot;
switch (cmd) {
case SIOCADDRT:
@@ -572,9 +589,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFDSTADDR:
return addrconf_set_dstaddr(net, argp);
default:
- if (!sk->sk_prot->ioctl)
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ if (!prot->ioctl)
return -ENOIOCTLCMD;
- return sk->sk_prot->ioctl(sk, cmd, arg);
+ return prot->ioctl(sk, cmd, arg);
}
/*NOTREACHED*/
return 0;
@@ -636,29 +655,34 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
- return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
sk, msg, size);
}
INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
int addr_len = 0;
int err;
if (likely(!(flags & MSG_ERRQUEUE)))
sock_rps_record_flow(sk);
- err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
@@ -690,6 +714,7 @@ const struct proto_ops inet6_stream_ops = {
.sendpage_locked = tcp_sendpage_locked,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
+ .read_skb = tcp_read_skb,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
@@ -715,7 +740,7 @@ const struct proto_ops inet6_dgram_ops = {
.getsockopt = sock_common_getsockopt, /* ok */
.sendmsg = inet6_sendmsg, /* retpoline's sake */
.recvmsg = inet6_recvmsg, /* retpoline's sake */
- .read_sock = udp_read_sock,
+ .read_skb = udp_read_skb,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
@@ -1044,6 +1069,8 @@ static const struct ipv6_stub ipv6_stub_impl = {
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.inet6_bind = __inet6_bind,
.udp6_lib_lookup = __udp6_lib_lookup,
+ .ipv6_setsockopt = do_ipv6_setsockopt,
+ .ipv6_getsockopt = do_ipv6_getsockopt,
};
static int __init inet6_init(void)
@@ -1057,6 +1084,8 @@ static int __init inet6_init(void)
for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
INIT_LIST_HEAD(r);
+ raw_hashinfo_init(&raw_v6_hashinfo);
+
if (disable_ipv6_mod) {
pr_info("Loaded, but administratively disabled, reboot required to enable\n");
goto out;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index b5995c1f4d7a..5228d2716289 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -666,30 +666,38 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
-static int ah6_init_state(struct xfrm_state *x)
+static int ah6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
struct ah_data *ahp = NULL;
struct xfrm_algo_desc *aalg_desc;
struct crypto_ahash *ahash;
- if (!x->aalg)
+ if (!x->aalg) {
+ NL_SET_ERR_MSG(extack, "AH requires a state with an AUTH algorithm");
goto error;
+ }
- if (x->encap)
+ if (x->encap) {
+ NL_SET_ERR_MSG(extack, "AH is not compatible with encapsulation");
goto error;
+ }
ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
if (!ahp)
return -ENOMEM;
ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
- if (IS_ERR(ahash))
+ if (IS_ERR(ahash)) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
+ }
ahp->ahash = ahash;
if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
- (x->aalg->alg_key_len + 7) / 8))
+ (x->aalg->alg_key_len + 7) / 8)) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
+ }
/*
* Lookup the algorithm description maintained by xfrm_algo,
@@ -702,9 +710,7 @@ static int ah6_init_state(struct xfrm_state *x)
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- pr_info("AH: %s digestsize %u != %u\n",
- x->aalg->alg_name, crypto_ahash_digestsize(ahash),
- aalg_desc->uinfo.auth.icv_fullbits/8);
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
}
@@ -721,6 +727,7 @@ static int ah6_init_state(struct xfrm_state *x)
x->props.header_len += sizeof(struct ipv6hdr);
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid mode requested for AH, must be one of TRANSPORT, TUNNEL, BEET");
goto error;
}
x->data = ahp;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 206f66310a88..5ecb56522f9d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -145,7 +145,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
int err;
if (usin->sin6_family == AF_INET) {
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -EAFNOSUPPORT;
err = __ip4_datagram_connect(sk, uaddr, addr_len);
goto ipv4_connected;
@@ -178,7 +178,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_type & IPV6_ADDR_MAPPED) {
struct sockaddr_in sin;
- if (__ipv6_only_sock(sk)) {
+ if (ipv6_only_sock(sk)) {
err = -ENETUNREACH;
goto out;
}
@@ -218,11 +218,11 @@ ipv4_connected:
err = -EINVAL;
goto out;
}
- sk->sk_bound_dev_if = usin->sin6_scope_id;
+ WRITE_ONCE(sk->sk_bound_dev_if, usin->sin6_scope_id);
}
if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
- sk->sk_bound_dev_if = np->mcast_oif;
+ WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif);
/* Connect to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
@@ -256,7 +256,7 @@ ipv4_connected:
goto out;
}
- reuseport_has_conns(sk, true);
+ reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
@@ -798,7 +798,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (src_idx) {
if (fl6->flowi6_oif &&
src_idx != fl6->flowi6_oif &&
- (sk->sk_bound_dev_if != fl6->flowi6_oif ||
+ (READ_ONCE(sk->sk_bound_dev_if) != fl6->flowi6_oif ||
!sk_dev_equal_l3scope(sk, src_idx)))
return -EINVAL;
fl6->flowi6_oif = src_idx;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bb2c407b46b..14ed868680c6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -151,6 +151,7 @@ static void esp_free_tcp_sk(struct rcu_head *head)
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
+ struct net *net = xs_net(x);
struct esp_tcp_sk *esk;
__be16 sport, dport;
struct sock *nsk;
@@ -177,7 +178,7 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
}
spin_unlock_bh(&x->lock);
- sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6,
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
dport, &x->props.saddr.in6, ntohs(sport), 0, 0);
if (!sk)
return ERR_PTR(-ENOENT);
@@ -343,7 +344,7 @@ static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
struct esp_output_extra *extra)
{
/* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
+ * accommodate the high bits. We will move it back after
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
@@ -490,6 +491,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
return err;
}
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -707,7 +712,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -737,7 +742,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen, hlen, elen;
int padlen, trimlen;
@@ -749,11 +753,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
elen = skb->len - hlen;
- if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
- ret = xo->proto;
- goto out;
- }
-
ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
BUG_ON(ret);
@@ -807,8 +806,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
struct tcphdr *th;
offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
-
- if (offset < 0) {
+ if (offset == -1) {
err = -EINVAL;
goto out;
}
@@ -899,7 +897,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
struct xfrm_state *x = xfrm_input_state(skb);
/* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
+ * accommodate the high bits. We will move it back after
* decryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
@@ -1053,16 +1051,17 @@ static void esp6_destroy(struct xfrm_state *x)
crypto_free_aead(aead);
}
-static int esp_init_aead(struct xfrm_state *x)
+static int esp_init_aead(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
- x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
- goto error;
+ x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) {
+ NL_SET_ERR_MSG(extack, "Algorithm name is too long");
+ return -ENAMETOOLONG;
+ }
aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
@@ -1080,11 +1079,15 @@ static int esp_init_aead(struct xfrm_state *x)
if (err)
goto error;
+ return 0;
+
error:
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
return err;
}
-static int esp_init_authenc(struct xfrm_state *x)
+static int esp_init_authenc(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
{
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
@@ -1095,10 +1098,6 @@ static int esp_init_authenc(struct xfrm_state *x)
unsigned int keylen;
int err;
- err = -EINVAL;
- if (!x->ealg)
- goto error;
-
err = -ENAMETOOLONG;
if ((x->props.flags & XFRM_STATE_ESN)) {
@@ -1107,22 +1106,28 @@ static int esp_init_authenc(struct xfrm_state *x)
x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
x->ealg->alg_name,
- x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) {
+ NL_SET_ERR_MSG(extack, "Algorithm name is too long");
goto error;
+ }
} else {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
"%s%sauthenc(%s,%s)%s",
x->geniv ?: "", x->geniv ? "(" : "",
x->aalg ? x->aalg->alg_name : "digest_null",
x->ealg->alg_name,
- x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME)
+ x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) {
+ NL_SET_ERR_MSG(extack, "Algorithm name is too long");
goto error;
+ }
}
aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
- if (IS_ERR(aead))
+ if (IS_ERR(aead)) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto error;
+ }
x->data = aead;
@@ -1152,17 +1157,16 @@ static int esp_init_authenc(struct xfrm_state *x)
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- pr_info("ESP: %s digestsize %u != %u\n",
- x->aalg->alg_name,
- crypto_aead_authsize(aead),
- aalg_desc->uinfo.auth.icv_fullbits / 8);
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto free_key;
}
err = crypto_aead_setauthsize(
aead, x->aalg->alg_trunc_len / 8);
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations");
goto free_key;
+ }
}
param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
@@ -1177,7 +1181,7 @@ error:
return err;
}
-static int esp6_init_state(struct xfrm_state *x)
+static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
struct crypto_aead *aead;
u32 align;
@@ -1185,10 +1189,14 @@ static int esp6_init_state(struct xfrm_state *x)
x->data = NULL;
- if (x->aead)
- err = esp_init_aead(x);
- else
- err = esp_init_authenc(x);
+ if (x->aead) {
+ err = esp_init_aead(x, extack);
+ } else if (x->ealg) {
+ err = esp_init_authenc(x, extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "ESP: AEAD or CRYPT must be provided");
+ err = -EINVAL;
+ }
if (err)
goto error;
@@ -1216,6 +1224,7 @@ static int esp6_init_state(struct xfrm_state *x)
switch (encap->encap_type) {
default:
+ NL_SET_ERR_MSG(extack, "Unsupported encapsulation type for ESP");
err = -EINVAL;
goto error;
case UDP_ENCAP_ESPINUDP:
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index ba5e81cd569c..79d43548279c 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,8 +145,10 @@ static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
struct sk_buff *skb,
netdev_features_t features)
{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
+ __be16 type = x->inner_mode.family == AF_INET ? htons(ETH_P_IP)
+ : htons(ETH_P_IPV6);
+
+ return skb_eth_gso_segment(skb, features, type);
}
static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
@@ -199,6 +201,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x,
ipv6_skip_exthdr(skb, 0, &proto, &frag);
}
+ if (proto == IPPROTO_IPIP)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
+
__skb_pull(skb, skb_transport_offset(skb));
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 77e34aec7e82..a8d961d3a477 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -90,12 +90,13 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
break;
fallthrough;
case 2: /* send ICMP PARM PROB regardless and drop packet */
- icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
+ icmpv6_param_prob_reason(skb, ICMPV6_UNK_OPTION, optoff,
+ SKB_DROP_REASON_UNHANDLED_PROTO);
return false;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
return false;
}
@@ -218,7 +219,7 @@ static bool ip6_parse_tlv(bool hopbyhop,
if (len == 0)
return true;
bad:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -232,6 +233,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ SKB_DR(reason);
int ret;
if (opt->dsthao) {
@@ -246,19 +248,23 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (hao->length != 16) {
net_dbg_ratelimited("hao invalid option length = %d\n",
hao->length);
+ SKB_DR_SET(reason, IP_INHDR);
goto discard;
}
if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
net_dbg_ratelimited("hao is not an unicast addr: %pI6\n",
&hao->addr);
+ SKB_DR_SET(reason, INVALID_PROTO);
goto discard;
}
ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
(xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
- if (unlikely(ret < 0))
+ if (unlikely(ret < 0)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto discard;
+ }
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -281,7 +287,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
return true;
discard:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return false;
}
#endif
@@ -487,7 +493,6 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev;
struct ipv6hdr *oldhdr;
- struct in6_addr addr;
unsigned char *buf;
int accept_rpl_seg;
int i, err;
@@ -616,9 +621,7 @@ looped_back:
return -1;
}
- addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = ohdr->rpl_segaddr[i];
- ohdr->rpl_segaddr[i] = addr;
+ swap(ipv6_hdr(skb)->daddr, ohdr->rpl_segaddr[i]);
ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n);
@@ -934,7 +937,7 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
}
net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n",
nh[optoff + 1]);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -988,7 +991,7 @@ ignore:
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -997,31 +1000,30 @@ drop:
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
- struct net *net = ipv6_skb_net(skb);
+ SKB_DR(reason);
u32 pkt_len;
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
nh[optoff+1]);
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ SKB_DR_SET(reason, IP_INHDR);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+ icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff + 2,
+ SKB_DROP_REASON_IP_INHDR);
return false;
}
if (ipv6_hdr(skb)->payload_len) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
+ icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff,
+ SKB_DROP_REASON_IP_INHDR);
return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ SKB_DR_SET(reason, PKT_TOO_SMALL);
goto drop;
}
@@ -1032,7 +1034,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return false;
}
@@ -1054,7 +1056,7 @@ static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff)
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -1344,14 +1346,14 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return opt2;
}
-struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
- struct ipv6_txoptions *opt)
+struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space,
+ struct ipv6_txoptions *opt)
{
/*
* ignore the dest before srcrt unless srcrt is being included.
* --yoshfuji
*/
- if (opt && opt->dst0opt && !opt->srcrt) {
+ if (opt->dst0opt && !opt->srcrt) {
if (opt_space != opt) {
memcpy(opt_space, opt, sizeof(*opt_space));
opt = opt_space;
@@ -1362,7 +1364,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
return opt;
}
-EXPORT_SYMBOL_GPL(ipv6_fixup_options);
+EXPORT_SYMBOL_GPL(__ipv6_fixup_options);
/**
* fl6_update_dst - update flowi destination address with info given
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec029c86ae06..7c2003833010 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -16,6 +16,7 @@
#include <linux/indirect_call_wrapper.h>
#include <net/fib_rules.h>
+#include <net/inet_dscp.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
@@ -25,14 +26,14 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
- u8 tclass;
+ dscp_t dscp;
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
- if (r->dst.plen || r->src.plen || r->tclass)
+ if (r->dst.plen || r->src.plen || r->dscp)
return false;
return fib_rule_matchall(rule);
}
@@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}
- if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
+ if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
@@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+ if (!inet_validate_dscp(frh->tos)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): ECN bits must be 0");
+ goto errout;
+ }
+ rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->src.plen = frh->src_len;
rule6->dst.plen = frh->dst_len;
- rule6->tclass = frh->tos;
if (fib_rule_requires_fldissect(rule))
net->ipv6.fib6_rules_require_fldissect++;
@@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && (rule6->tclass != frh->tos))
+ if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
return 0;
if (frh->src_len &&
@@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = rule6->tclass;
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -486,16 +493,21 @@ out_fib6_rules_ops:
goto out;
}
-static void __net_exit fib6_rules_net_exit(struct net *net)
+static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ list_for_each_entry(net, net_list, exit_list) {
+ fib_rules_unregister(net->ipv6.fib6_rules_ops);
+ cond_resched();
+ }
rtnl_unlock();
}
static struct pernet_operations fib6_rules_net_ops = {
.init = fib6_rules_net_init,
- .exit = fib6_rules_net_exit,
+ .exit_batch = fib6_rules_net_exit_batch,
};
int __init fib6_rules_init(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 96c5cc0f30ce..9d92d51c4757 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -69,17 +69,7 @@
#include <linux/uaccess.h>
-/*
- * The ICMP socket(s). This is the most convenient way to flow control
- * our ICMP output as well as maintain a clean interface throughout
- * all layers. All Socketless IP sends will soon be gone.
- *
- * On SMP we have one ICMP socket per-cpu.
- */
-static struct sock *icmpv6_sk(struct net *net)
-{
- return this_cpu_read(*net->ipv6.icmp_sk);
-}
+static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
@@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = {
};
/* Called with BH disabled */
-static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
+static struct sock *icmpv6_xmit_lock(struct net *net)
{
struct sock *sk;
- sk = icmpv6_sk(net);
+ sk = this_cpu_read(ipv6_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path (f.e. SIT or
* ip6ip6 tunnel) signals dst_link_failure() for an
@@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
*/
return NULL;
}
+ sock_net_set(sk, net);
return sk;
}
-static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
+static void icmpv6_xmit_unlock(struct sock *sk)
{
+ sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
@@ -637,12 +629,13 @@ out_bh_enable:
}
EXPORT_SYMBOL(icmp6_send);
-/* Slightly more convenient version of icmp6_send.
+/* Slightly more convenient version of icmp6_send with drop reasons.
*/
-void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
+ enum skb_drop_reason reason)
{
icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
@@ -872,21 +865,23 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct net *net = dev_net(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
struct icmp6hdr *hdr;
u8 type;
- bool success = false;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
@@ -894,8 +889,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*hdr));
- if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
skb_set_network_header(skb, nh);
}
@@ -927,16 +925,16 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_EXT_ECHO_REQUEST:
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
- net->ipv4.sysctl_icmp_echo_enable_probe)
+ READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
icmpv6_echo_reply(skb);
break;
case ICMPV6_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_EXT_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
@@ -1002,19 +1000,20 @@ static int icmpv6_rcv(struct sk_buff *skb)
/* until the v6 path can be better sorted assume failure and
* preserve the status quo behaviour for the rest of the paths to here
*/
- if (success)
- consume_skb(skb);
+ if (reason)
+ kfree_skb_reason(skb, reason);
else
- kfree_skb(skb);
+ consume_skb(skb);
return 0;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
@@ -1034,59 +1033,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
}
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i)
- inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
- free_percpu(net->ipv6.icmp_sk);
-}
-
-static int __net_init icmpv6_sk_init(struct net *net)
+int __init icmpv6_init(void)
{
struct sock *sk;
int err, i;
- net->ipv6.icmp_sk = alloc_percpu(struct sock *);
- if (!net->ipv6.icmp_sk)
- return -ENOMEM;
-
for_each_possible_cpu(i) {
err = inet_ctl_sock_create(&sk, PF_INET6,
- SOCK_RAW, IPPROTO_ICMPV6, net);
+ SOCK_RAW, IPPROTO_ICMPV6, &init_net);
if (err < 0) {
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
err);
- goto fail;
+ return err;
}
- *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
+ per_cpu(ipv6_icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
}
- return 0;
-
- fail:
- icmpv6_sk_exit(net);
- return err;
-}
-
-static struct pernet_operations icmpv6_sk_ops = {
- .init = icmpv6_sk_init,
- .exit = icmpv6_sk_exit,
-};
-
-int __init icmpv6_init(void)
-{
- int err;
-
- err = register_pernet_subsys(&icmpv6_sk_ops);
- if (err < 0)
- return err;
err = -EAGAIN;
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
@@ -1101,14 +1068,12 @@ sender_reg_err:
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
fail:
pr_err("Failed to register ICMP6 protocol\n");
- unregister_pernet_subsys(&icmpv6_sk_ops);
return err;
}
void icmpv6_cleanup(void)
{
inet6_unregister_icmp_sender(icmp6_send);
- unregister_pernet_subsys(&icmpv6_sk_ops);
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
}
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 36c58aa257e8..3faf62530d6a 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -55,6 +55,7 @@ struct genl_family ila_nl_family __ro_after_init = {
.module = THIS_MODULE,
.ops = ila_nl_ops,
.n_ops = ARRAY_SIZE(ila_nl_ops),
+ .resv_start_op = ILA_CMD_FLUSH + 1,
};
static __net_init int ila_init_net(struct net *net)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4514444e96c8..b64b49012655 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,8 +21,6 @@
#include <net/ip.h>
#include <net/sock_reuseport.h>
-extern struct inet_hashinfo tcp_hashinfo;
-
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
@@ -71,12 +69,12 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
+ if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
+ if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -138,12 +136,11 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
- struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
int score, hiscore = 0;
- inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
@@ -170,7 +167,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net,
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != &tcp_hashinfo)
+ if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
return NULL; /* only TCP is supported */
no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
@@ -269,7 +266,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+ if (likely(inet6_match(net, sk2, saddr, daddr, ports,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
@@ -308,7 +305,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -320,7 +317,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);
@@ -333,11 +330,8 @@ int inet6_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index e159eb4328a8..571f0e4d9cf3 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -619,6 +619,7 @@ static struct genl_family ioam6_genl_family __ro_after_init = {
.parallel_ops = true,
.ops = ioam6_genl_ops,
.n_ops = ARRAY_SIZE(ioam6_genl_ops),
+ .resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1,
.module = THIS_MODULE,
};
@@ -635,7 +636,8 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_schema *sc,
u8 sclen, bool is_input)
{
- struct __kernel_sock_timeval ts;
+ struct timespec64 ts;
+ ktime_t tstamp;
u64 raw64;
u32 raw32;
u16 raw16;
@@ -680,10 +682,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev) {
*(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
} else {
- if (!skb->tstamp)
- __net_timestamp(skb);
+ tstamp = skb_tstamp_cond(skb, true);
+ ts = ktime_to_timespec64(tstamp);
- skb_get_new_timestamp(skb, &ts);
*(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
}
data += sizeof(__be32);
@@ -694,13 +695,12 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev) {
*(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
} else {
- if (!skb->tstamp)
- __net_timestamp(skb);
+ if (!trace->type.bit2) {
+ tstamp = skb_tstamp_cond(skb, true);
+ ts = ktime_to_timespec64(tstamp);
+ }
- if (!trace->type.bit2)
- skb_get_new_timestamp(skb, &ts);
-
- *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ *(__be32 *)data = cpu_to_be32((u32)(ts.tv_nsec / NSEC_PER_USEC));
}
data += sizeof(__be32);
}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f90a87389fcc..f6f5b83dd954 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -32,13 +32,25 @@ struct ioam6_lwt_encap {
struct ioam6_trace_hdr traceh;
} __packed;
+struct ioam6_lwt_freq {
+ u32 k;
+ u32 n;
+};
+
struct ioam6_lwt {
struct dst_cache cache;
+ struct ioam6_lwt_freq freq;
+ atomic_t pkt_cnt;
u8 mode;
struct in6_addr tundst;
struct ioam6_lwt_encap tuninfo;
};
+static struct netlink_range_validation freq_range = {
+ .min = IOAM6_IPTUNNEL_FREQ_MIN,
+ .max = IOAM6_IPTUNNEL_FREQ_MAX,
+};
+
static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
{
return (struct ioam6_lwt *)lwt->data;
@@ -55,6 +67,8 @@ static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
}
static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
+ [IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range),
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
@@ -96,6 +110,7 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
struct lwtunnel_state *lwt;
struct ioam6_lwt *ilwt;
int len_aligned, err;
+ u32 freq_k, freq_n;
u8 mode;
if (family != AF_INET6)
@@ -106,6 +121,23 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
+ if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) ||
+ (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) {
+ NL_SET_ERR_MSG(extack, "freq: missing parameter");
+ return -EINVAL;
+ } else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) {
+ freq_k = IOAM6_IPTUNNEL_FREQ_MIN;
+ freq_n = IOAM6_IPTUNNEL_FREQ_MIN;
+ } else {
+ freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]);
+ freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]);
+
+ if (freq_k > freq_n) {
+ NL_SET_ERR_MSG(extack, "freq: k > n is forbidden");
+ return -EINVAL;
+ }
+ }
+
if (!tb[IOAM6_IPTUNNEL_MODE])
mode = IOAM6_IPTUNNEL_MODE_INLINE;
else
@@ -140,6 +172,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
return err;
}
+ atomic_set(&ilwt->pkt_cnt, 0);
+ ilwt->freq.k = freq_k;
+ ilwt->freq.n = freq_n;
+
ilwt->mode = mode;
if (tb[IOAM6_IPTUNNEL_DST])
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
@@ -263,11 +299,18 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct in6_addr orig_daddr;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
+ u32 pkt_cnt;
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
ilwt = ioam6_lwt_state(dst->lwtstate);
+
+ /* Check for insertion frequency (i.e., "k over n" insertions) */
+ pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
+ if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
+ goto out;
+
orig_daddr = ipv6_hdr(skb)->daddr;
switch (ilwt->mode) {
@@ -358,6 +401,14 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
int err;
+ err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k);
+ if (err)
+ goto ret;
+
+ err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n);
+ if (err)
+ goto ret;
+
err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
if (err)
goto ret;
@@ -379,7 +430,9 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
int nlsize;
- nlsize = nla_total_size(sizeof(ilwt->mode)) +
+ nlsize = nla_total_size(sizeof(ilwt->freq.k)) +
+ nla_total_size(sizeof(ilwt->freq.n)) +
+ nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
@@ -395,7 +448,9 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
- return (ilwt_a->mode != ilwt_b->mode ||
+ return (ilwt_a->freq.k != ilwt_b->freq.k ||
+ ilwt_a->freq.n != ilwt_b->freq.n ||
+ ilwt_a->mode != ilwt_b->mode ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
trace_a->namespace_id != trace_b->namespace_id);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 463c37dea449..413f66781e50 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
fn = rcu_dereference_protected(f6i->fib6_node,
lockdep_is_held(&f6i->fib6_table->tb6_lock));
if (fn)
- fn->fn_sernum = fib6_new_sernum(net);
+ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
}
/*
@@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = READ_ONCE(w->root->fn_sernum);
}
} else {
- if (cb->args[5] != w->root->fn_sernum) {
+ int sernum = READ_ONCE(w->root->fn_sernum);
+ if (cb->args[5] != sernum) {
/* Begin at the root if the tree changed */
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = sernum;
w->state = FWS_INIT;
w->node = w->root;
w->skip = w->count;
@@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
/* paired with smp_rmb() in fib6_get_cookie_safe() */
smp_wmb();
while (fn) {
- fn->fn_sernum = sernum;
+ WRITE_ONCE(fn->fn_sernum, sernum);
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
@@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w)
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
- w->node->fn_sernum != c->sernum)
- w->node->fn_sernum = c->sernum;
+ READ_ONCE(w->node->fn_sernum) != c->sernum)
+ WRITE_ONCE(w->node->fn_sernum, c->sernum);
if (!c->func) {
WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
@@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
iter->w.args = iter;
- iter->sernum = iter->w.root->fn_sernum;
+ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
INIT_LIST_HEAD(&iter->w.lh);
fib6_walker_link(net, &iter->w);
}
@@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
{
- if (iter->sernum != iter->w.root->fn_sernum) {
- iter->sernum = iter->w.root->fn_sernum;
+ int sernum = READ_ONCE(iter->w.root->fn_sernum);
+
+ if (iter->sernum != sernum) {
+ iter->sernum = sernum;
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
WARN_ON(iter->w.skip);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index aa673a6a7e43..18481eb76a0a 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -220,7 +220,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
spin_lock_bh(&ip6_fl_lock);
if (label == 0) {
for (;;) {
- fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
+ fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK;
if (fl->label) {
lfl = __fl_lookup(net, fl->label);
if (!lfl)
@@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
err = -EINVAL;
goto done;
}
- if (fl_shared_exclusive(fl) || fl->opt)
+ if (fl_shared_exclusive(fl) || fl->opt) {
+ WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
+ }
return fl;
done:
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8753e9cec326..c035a96fba3a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -360,7 +360,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
return NULL;
- strlcpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name, IFNAMSIZ);
} else {
strcpy(name, "ip6gre%d");
}
@@ -382,11 +382,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
goto failed_free;
ip6gre_tnl_link_config(nt, 1);
-
- /* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & TUNNEL_SEQ))
- dev->features |= NETIF_F_LLTX;
-
ip6gre_tunnel_link(ign, nt);
return nt;
@@ -403,7 +398,7 @@ static void ip6erspan_tunnel_uninit(struct net_device *dev)
ip6erspan_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
dst_cache_reset(&t->dst_cache);
- dev_put_track(dev, &t->dev_tracker);
+ netdev_put(dev, &t->dev_tracker);
}
static void ip6gre_tunnel_uninit(struct net_device *dev)
@@ -416,7 +411,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
if (ign->fb_tunnel_dev == dev)
WRITE_ONCE(ign->fb_tunnel_dev, NULL);
dst_cache_reset(&t->dst_cache);
- dev_put_track(dev, &t->dev_tracker);
+ netdev_put(dev, &t->dev_tracker);
}
@@ -706,6 +701,33 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
return 0;
}
+static int prepare_ip6gre_xmit_other(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ *encap_limit = t->parms.encap_limit;
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = 0;
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ return 0;
+}
+
static struct ip_tunnel_info *skb_tunnel_info_txcheck(struct sk_buff *skb)
{
struct ip_tunnel_info *tun_info;
@@ -724,6 +746,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
{
struct ip6_tnl *tunnel = netdev_priv(dev);
__be16 protocol;
+ __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -733,16 +756,13 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
- return -ENOMEM;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
- __be16 flags;
+ int tun_hlen;
tun_info = skb_tunnel_info_txcheck(skb);
if (IS_ERR(tun_info) ||
@@ -760,21 +780,27 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
dsfield = key->tos;
flags = key->tun_flags &
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
- tunnel->tun_hlen = gre_calc_hlen(flags);
+ tun_hlen = gre_calc_hlen(flags);
- gre_build_header(skb, tunnel->tun_hlen,
+ if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
+ return -ENOMEM;
+
+ gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+ return -ENOMEM;
+
+ flags = tunnel->parms.o_flags;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -869,20 +895,18 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
int encap_limit = -1;
struct flowi6 fl6;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- if (!t->parms.collect_md)
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ if (!t->parms.collect_md &&
+ prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit))
+ return -1;
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
return err;
-
- err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol);
+ err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol);
return err;
}
@@ -892,6 +916,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ __be16 payload_protocol;
int ret;
if (!pskb_inet_may_pull(skb))
@@ -900,7 +925,8 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
- switch (skb->protocol) {
+ payload_protocol = skb_protocol(skb, true);
+ switch (payload_protocol) {
case htons(ETH_P_IP):
ret = ip6gre_xmit_ipv4(skb, dev);
break;
@@ -940,7 +966,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
__be16 proto;
__u32 mtu;
int nhoff;
- int thoff;
if (!pskb_inet_may_pull(skb))
goto tx_err;
@@ -961,10 +986,16 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
@@ -1052,7 +1083,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* Push GRE header. */
proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
: htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1144,14 +1175,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
dev->needed_headroom = dst_len;
if (set_mtu) {
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ int mtu = rt->dst.dev->mtu - t_hlen;
+
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+ mtu -= 8;
if (dev->type == ARPHRD_ETHER)
- dev->mtu -= ETH_HLEN;
+ mtu -= ETH_HLEN;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
ip6_rt_put(rt);
@@ -1441,26 +1474,23 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
+ __be16 flags;
- dev->features |= GRE6_FEATURES;
+ dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE6_FEATURES;
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- nt->encap.type == TUNNEL_ENCAP_NONE) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
+ flags = nt->parms.o_flags;
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
+ /* TCP offload with GRE SEQ is not supported, nor can we support 2
+ * levels of outer headers requiring an update.
+ */
+ if (flags & TUNNEL_SEQ)
+ return;
+ if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE)
+ return;
+
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
static int ip6gre_tunnel_init_common(struct net_device *dev)
@@ -1499,7 +1529,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
}
ip6gre_tnl_init_features(dev);
- dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
return 0;
cleanup_dst_cache_init:
@@ -1891,7 +1921,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip6erspan_tnl_link_config(tunnel, 1);
- dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
return 0;
cleanup_dst_cache_init:
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 80256717868e..e1ebf5e42ebe 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -45,20 +45,23 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
- udp_v6_early_demux, skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
@@ -145,12 +148,14 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
struct net *net)
{
+ enum skb_drop_reason reason;
const struct ipv6hdr *hdr;
u32 pkt_len;
struct inet6_dev *idev;
if (skb->pkt_type == PACKET_OTHERHOST) {
- kfree_skb(skb);
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
+ kfree_skb_reason(skb, SKB_DROP_REASON_OTHERHOST);
return NULL;
}
@@ -160,9 +165,12 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
__IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
+ SKB_DR_SET(reason, NOT_SPECIFIED);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ if (idev && unlikely(idev->cnf.disable_ipv6))
+ SKB_DR_SET(reason, IPV6DISABLED);
goto drop;
}
@@ -186,8 +194,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
hdr = ipv6_hdr(skb);
- if (hdr->version != 6)
+ if (hdr->version != 6) {
+ SKB_DR_SET(reason, UNHANDLED_PROTO);
goto err;
+ }
__IP6_ADD_STATS(net, idev,
IPSTATS_MIB_NOECTPKTS +
@@ -225,8 +235,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
- idev->cnf.drop_unicast_in_l2_multicast)
+ idev->cnf.drop_unicast_in_l2_multicast) {
+ SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST);
goto err;
+ }
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
@@ -255,12 +267,11 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
__IP6_INC_STATS(net,
idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ SKB_DR_SET(reason, PKT_TOO_SMALL);
goto drop;
}
- if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- goto drop;
- }
+ if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
+ goto err;
hdr = ipv6_hdr(skb);
}
@@ -281,9 +292,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
return skb;
err:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ SKB_DR_OR(reason, IP_INHDR);
drop:
rcu_read_unlock();
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NULL;
}
@@ -353,6 +365,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
+ SKB_DR(reason);
bool raw;
/*
@@ -412,12 +425,16 @@ resubmit_final:
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(dev, &hdr->daddr,
&hdr->saddr) &&
- !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
+ !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) {
+ SKB_DR_SET(reason, IP_INADDRERRORS);
goto discard;
+ }
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
- !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto discard;
+ }
ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
skb);
@@ -443,8 +460,11 @@ resubmit_final:
IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
+ SKB_DR_SET(reason, IP_NOPROTO);
+ } else {
+ SKB_DR_SET(reason, XFRM_POLICY);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
} else {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
@@ -454,11 +474,12 @@ resubmit_final:
discard:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ skb_clear_delivery_time(skb);
rcu_read_lock();
ip6_protocol_deliver_rcu(net, skb, 0, false);
rcu_read_unlock();
@@ -508,7 +529,7 @@ int ip6_mc_input(struct sk_buff *skb)
/*
* IPv6 multicast router mode is now supported ;)
*/
- if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+ if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b29e9ba5e113..3ee345672849 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -77,7 +77,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct ipv6hdr *ipv6h;
const struct net_offload *ops;
- int proto;
+ int proto, nexthdr;
struct frag_hdr *fptr;
unsigned int payload_len;
u8 *prevhdr;
@@ -87,6 +87,28 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
bool gso_partial;
skb_reset_network_header(skb);
+ nexthdr = ipv6_has_hopopt_jumbo(skb);
+ if (nexthdr) {
+ const int hophdr_len = sizeof(struct hop_jumbo_hdr);
+ int err;
+
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ memmove(skb_mac_header(skb) + hophdr_len,
+ skb_mac_header(skb),
+ ETH_HLEN + sizeof(struct ipv6hdr));
+ skb->data += hophdr_len;
+ skb->len -= hophdr_len;
+ skb->network_header += hophdr_len;
+ skb->mac_header += hophdr_len;
+ ipv6h = (struct ipv6hdr *)skb->data;
+ ipv6h->nexthdr = nexthdr;
+ }
nhoff = skb_network_header(skb) - skb_mac_header(skb);
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
@@ -114,6 +136,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
}
if (IS_ERR_OR_NULL(segs))
@@ -195,12 +219,9 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
off = skb_gro_offset(skb);
hlen = off + sizeof(*iph);
- iph = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- iph = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!iph))
- goto out;
- }
+ iph = skb_gro_header(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
skb_set_network_header(skb, off);
skb_gro_pull(skb, sizeof(*iph));
@@ -211,7 +232,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
proto = iph->nexthdr;
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive) {
- __pskb_pull(skb, skb_gro_offset(skb));
+ pskb_pull(skb, skb_gro_offset(skb));
skb_gro_frag0_invalidate(skb);
proto = ipv6_gso_pull_exthdrs(skb, proto);
skb_gro_pull(skb, -skb_transport_offset(skb));
@@ -249,7 +270,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if ((first_word & htonl(0xF00FFFFF)) ||
!ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
!ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
- *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+ iph->nexthdr != iph2->nexthdr) {
not_same_flow:
NAPI_GRO_CB(p)->same_flow = 0;
continue;
@@ -260,7 +281,8 @@ not_same_flow:
goto not_same_flow;
}
/* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+ (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
NAPI_GRO_CB(p)->flush |= flush;
/* If the previous IP ID value was based on an atomic
@@ -317,15 +339,43 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
- struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
+ struct ipv6hdr *iph;
int err = -ENOSYS;
+ u32 payload_len;
if (skb->encapsulation) {
skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
skb_set_inner_network_header(skb, nhoff);
}
- iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
+ payload_len = skb->len - nhoff - sizeof(*iph);
+ if (unlikely(payload_len > IPV6_MAXPLEN)) {
+ struct hop_jumbo_hdr *hop_jumbo;
+ int hoplen = sizeof(*hop_jumbo);
+
+ /* Move network header left */
+ memmove(skb_mac_header(skb) - hoplen, skb_mac_header(skb),
+ skb->transport_header - skb->mac_header);
+ skb->data -= hoplen;
+ skb->len += hoplen;
+ skb->mac_header -= hoplen;
+ skb->network_header -= hoplen;
+ iph = (struct ipv6hdr *)(skb->data + nhoff);
+ hop_jumbo = (struct hop_jumbo_hdr *)(iph + 1);
+
+ /* Build hop-by-hop options */
+ hop_jumbo->nexthdr = iph->nexthdr;
+ hop_jumbo->hdrlen = 0;
+ hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
+ hop_jumbo->tlv_len = 4;
+ hop_jumbo->jumbo_payload_len = htonl(payload_len + hoplen);
+
+ iph->nexthdr = NEXTHDR_HOP;
+ iph->payload_len = 0;
+ } else {
+ iph = (struct ipv6hdr *)(skb->data + nhoff);
+ iph->payload_len = htons(payload_len);
+ }
nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2995f8d89e7e..e19507614f64 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -119,19 +119,21 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
- if (!IS_ERR(neigh)) {
- sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb, false);
- rcu_read_unlock_bh();
- return ret;
+
+ if (unlikely(IS_ERR_OR_NULL(neigh))) {
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
+ if (IS_ERR(neigh)) {
+ rcu_read_unlock_bh();
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
+ return -EINVAL;
+ }
}
+ sock_confirm_neigh(skb, neigh);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
-
- IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
- return -EINVAL;
+ return ret;
}
static int
@@ -180,7 +182,9 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
#endif
mtu = ip6_skb_dst_mtu(skb);
- if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
+ if (skb_is_gso(skb) &&
+ !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
+ !skb_gso_validate_network_len(skb, mtu))
return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
if ((skb->len > mtu && !skb_is_gso(skb)) ||
@@ -198,11 +202,10 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
switch (ret) {
case NET_XMIT_SUCCESS:
- return __ip6_finish_output(net, sk, skb);
case NET_XMIT_CN:
return __ip6_finish_output(net, sk, skb) ? : ret;
default:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
return ret;
}
}
@@ -217,7 +220,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
}
@@ -251,6 +254,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(dst);
+ struct hop_jumbo_hdr *hop_jumbo;
+ int hoplen = sizeof(*hop_jumbo);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
@@ -258,7 +263,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
int hlimit = -1;
u32 mtu;
- head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
+ head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
@@ -281,6 +286,20 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
&fl6->saddr);
}
+ if (unlikely(seg_len > IPV6_MAXPLEN)) {
+ hop_jumbo = skb_push(skb, hoplen);
+
+ hop_jumbo->nexthdr = proto;
+ hop_jumbo->hdrlen = 0;
+ hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
+ hop_jumbo->tlv_len = 4;
+ hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
+
+ proto = IPPROTO_HOPOPTS;
+ seg_len = 0;
+ IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
@@ -440,7 +459,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
}
#endif
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return dst_output(net, sk, skb);
}
@@ -469,6 +488,7 @@ int ip6_forward(struct sk_buff *skb)
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
+ SKB_DR(reason);
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
@@ -485,7 +505,7 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
if (!net->ipv6.devconf_all->disable_policy &&
- !idev->cnf.disable_policy &&
+ (!idev || !idev->cnf.disable_policy) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -518,7 +538,7 @@ int ip6_forward(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return -ETIMEDOUT;
}
@@ -537,6 +557,7 @@ int ip6_forward(struct sk_buff *skb)
if (!xfrm6_route_forward(skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
}
dst = skb_dst(skb);
@@ -596,7 +617,7 @@ int ip6_forward(struct sk_buff *skb)
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
return -EMSGSIZE;
}
@@ -618,8 +639,9 @@ int ip6_forward(struct sk_buff *skb)
error:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
+ SKB_DR_SET(reason, IP_INADDRERRORS);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
@@ -813,6 +835,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
+ bool mono_delivery_time = skb->mono_delivery_time;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
@@ -903,7 +926,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -962,7 +985,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- frag->tstamp = tstamp;
+ skb_set_delivery_time(frag, tstamp, mono_delivery_time);
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1034,8 +1057,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
dst_release(dst);
dst = NULL;
}
@@ -1289,8 +1311,7 @@ struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
fl6.daddr = info->key.u.ipv6.dst;
fl6.saddr = info->key.u.ipv6.src;
prio = info->key.tos;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
- info->key.label);
+ fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
NULL);
@@ -1350,11 +1371,16 @@ static void ip6_append_data_mtu(unsigned int *mtu,
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
- struct rt6_info *rt, struct flowi6 *fl6)
+ struct rt6_info *rt)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu;
- struct ipv6_txoptions *opt = ipc6->opt;
+ struct ipv6_txoptions *nopt, *opt = ipc6->opt;
+
+ /* callers pass dst together with a reference, set it first so
+ * ip6_cork_release() can put it down even in case of an error.
+ */
+ cork->base.dst = &rt->dst;
/*
* setup for corking
@@ -1363,39 +1389,32 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (WARN_ON(v6_cork->opt))
return -EINVAL;
- v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
- if (unlikely(!v6_cork->opt))
+ nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
+ if (unlikely(!nopt))
return -ENOBUFS;
- v6_cork->opt->tot_len = sizeof(*opt);
- v6_cork->opt->opt_flen = opt->opt_flen;
- v6_cork->opt->opt_nflen = opt->opt_nflen;
+ nopt->tot_len = sizeof(*opt);
+ nopt->opt_flen = opt->opt_flen;
+ nopt->opt_nflen = opt->opt_nflen;
- v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
- sk->sk_allocation);
- if (opt->dst0opt && !v6_cork->opt->dst0opt)
+ nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
+ if (opt->dst0opt && !nopt->dst0opt)
return -ENOBUFS;
- v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
- sk->sk_allocation);
- if (opt->dst1opt && !v6_cork->opt->dst1opt)
+ nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
+ if (opt->dst1opt && !nopt->dst1opt)
return -ENOBUFS;
- v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
- sk->sk_allocation);
- if (opt->hopopt && !v6_cork->opt->hopopt)
+ nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
+ if (opt->hopopt && !nopt->hopopt)
return -ENOBUFS;
- v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
- sk->sk_allocation);
- if (opt->srcrt && !v6_cork->opt->srcrt)
+ nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
+ if (opt->srcrt && !nopt->srcrt)
return -ENOBUFS;
/* need source address above miyazawa*/
}
- dst_hold(&rt->dst);
- cork->base.dst = &rt->dst;
- cork->fl.u.ip6 = *fl6;
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
@@ -1408,8 +1427,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (np->frag_size)
mtu = np->frag_size;
}
- if (mtu < IPV6_MIN_MTU)
- return -EINVAL;
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
@@ -1426,17 +1443,18 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
}
static int __ip6_append_data(struct sock *sk,
- struct flowi6 *fl6,
struct sk_buff_head *queue,
- struct inet_cork *cork,
+ struct inet_cork_full *cork_full,
struct inet6_cork *v6_cork,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
unsigned int flags, struct ipcm6_cookie *ipc6)
{
struct sk_buff *skb, *skb_prev = NULL;
+ struct inet_cork *cork = &cork_full->base;
+ struct flowi6 *fl6 = &cork_full->fl.u.ip6;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
@@ -1445,6 +1463,7 @@ static int __ip6_append_data(struct sock *sk,
int copy;
int err;
int offset = 0;
+ bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = (struct rt6_info *)cork->dst;
struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1465,14 +1484,12 @@ static int __ip6_append_data(struct sock *sk,
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
- tskey = sk->sk_tskey++;
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
- sizeof(struct frag_hdr);
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
@@ -1480,6 +1497,13 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
+ if (mtu <= fragheaderlen ||
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
+ goto emsgsize;
+
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
+
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
* the first fragment
*/
@@ -1488,6 +1512,7 @@ static int __ip6_append_data(struct sock *sk,
if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
sizeof(struct ipv6hdr));
@@ -1516,17 +1541,35 @@ emsgsize:
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
- if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
- uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
- if (!uarg)
- return -ENOBUFS;
- extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
- if (rt->dst.dev->features & NETIF_F_SG &&
- csummode == CHECKSUM_PARTIAL) {
- paged = true;
- } else {
- uarg->zerocopy = 0;
- skb_zcopy_set(skb, uarg, &extra_uref);
+ if ((flags & MSG_ZEROCOPY) && length) {
+ struct msghdr *msg = from;
+
+ if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
+ if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
+ return -EINVAL;
+
+ /* Leave uarg NULL if can't zerocopy, callers should
+ * be able to handle it.
+ */
+ if ((rt->dst.dev->features & NETIF_F_SG) &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ zc = true;
+ uarg = msg->msg_ubuf;
+ }
+ } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ zc = true;
+ } else {
+ uarg_to_msgzc(uarg)->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
}
}
@@ -1606,8 +1649,8 @@ alloc_new_skb:
!(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
- alloclen = min_t(int, fraglen, MAX_HEADER);
- pagedlen = fraglen - alloclen;
+ alloclen = fragheaderlen + transhdrlen;
+ pagedlen = datalen - transhdrlen;
}
alloclen += alloc_extra;
@@ -1717,13 +1760,14 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
- } else if (!uarg || !uarg->zerocopy) {
+ } else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
if (!sk_page_frag_refill(sk, pfrag))
goto error;
+ skb_zcopy_downgrade_managed(skb);
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
err = -EMSGSIZE;
@@ -1773,7 +1817,7 @@ error:
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags)
{
@@ -1788,34 +1832,46 @@ int ip6_append_data(struct sock *sk,
/*
* setup for corking
*/
+ dst_hold(&rt->dst);
err = ip6_setup_cork(sk, &inet->cork, &np->cork,
- ipc6, rt, fl6);
+ ipc6, rt);
if (err)
return err;
+ inet->cork.fl.u.ip6 = *fl6;
exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
- fl6 = &inet->cork.fl.u.ip6;
transhdrlen = 0;
}
- return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
+ return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags, ipc6);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
+static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
+{
+ struct dst_entry *dst = cork->base.dst;
+
+ cork->base.dst = NULL;
+ cork->base.flags &= ~IPCORK_ALLFRAG;
+ skb_dst_set(skb, dst);
+}
+
static void ip6_cork_release(struct inet_cork_full *cork,
struct inet6_cork *v6_cork)
{
if (v6_cork->opt) {
- kfree(v6_cork->opt->dst0opt);
- kfree(v6_cork->opt->dst1opt);
- kfree(v6_cork->opt->hopopt);
- kfree(v6_cork->opt->srcrt);
- kfree(v6_cork->opt);
+ struct ipv6_txoptions *opt = v6_cork->opt;
+
+ kfree(opt->dst0opt);
+ kfree(opt->dst1opt);
+ kfree(opt->hopopt);
+ kfree(opt->srcrt);
+ kfree(opt);
v6_cork->opt = NULL;
}
@@ -1824,7 +1880,6 @@ static void ip6_cork_release(struct inet_cork_full *cork,
cork->base.dst = NULL;
cork->base.flags &= ~IPCORK_ALLFRAG;
}
- memset(&cork->fl, 0, sizeof(cork->fl));
}
struct sk_buff *__ip6_make_skb(struct sock *sk,
@@ -1834,7 +1889,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
{
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
- struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+ struct in6_addr *final_dst;
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
@@ -1864,9 +1919,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
/* Allow local fragmentation. */
skb->ignore_df = ip6_sk_ignore_df(sk);
-
- *final_dst = fl6->daddr;
__skb_pull(skb, skb_network_header_len(skb));
+
+ final_dst = &fl6->daddr;
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
@@ -1886,10 +1941,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
skb->priority = sk->sk_priority;
skb->mark = cork->base.mark;
-
skb->tstamp = cork->base.transmit_time;
- skb_dst_set(skb, dst_clone(&rt->dst));
+ ip6_cork_steal_dst(skb, cork);
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -1960,27 +2014,27 @@ EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
- struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags,
- struct inet_cork_full *cork)
+ void *from, size_t length, int transhdrlen,
+ struct ipcm6_cookie *ipc6, struct rt6_info *rt,
+ unsigned int flags, struct inet_cork_full *cork)
{
struct inet6_cork v6_cork;
struct sk_buff_head queue;
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
int err;
- if (flags & MSG_PROBE)
+ if (flags & MSG_PROBE) {
+ dst_release(&rt->dst);
return NULL;
+ }
__skb_queue_head_init(&queue);
cork->base.flags = 0;
cork->base.addr = 0;
cork->base.opt = NULL;
- cork->base.dst = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
+ err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
if (err) {
ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
@@ -1988,7 +2042,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
- err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
+ err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, ipc6);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fe786df4f849..2fb4c6ad7243 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -257,8 +257,6 @@ static int ip6_tnl_create2(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
int err;
- t = netdev_priv(dev);
-
dev->rtnl_link_ops = &ip6_link_ops;
err = register_netdevice(dev);
if (err < 0)
@@ -295,7 +293,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
if (p->name[0]) {
if (!dev_valid_name(p->name))
goto failed;
- strlcpy(name, p->name, IFNAMSIZ);
+ strscpy(name, p->name, IFNAMSIZ);
} else {
sprintf(name, "ip6tnl%%d");
}
@@ -383,7 +381,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
else
ip6_tnl_unlink(ip6n, t);
dst_cache_reset(&t->dst_cache);
- dev_put_track(dev, &t->dev_tracker);
+ netdev_put(dev, &t->dev_tracker);
}
/**
@@ -798,7 +796,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
struct sk_buff *skb),
bool log_ecn_err)
{
- struct pcpu_sw_netstats *tstats;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int err;
@@ -858,11 +855,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
}
}
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ dev_sw_netstats_rx_add(tunnel->dev, skb->len);
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
@@ -1036,14 +1029,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
true, 0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
@@ -1087,10 +1080,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
+ __be16 payload_protocol;
bool use_cache = false;
u8 hop_limit;
int err = -1;
+ payload_protocol = skb_protocol(skb, true);
+
if (t->parms.collect_md) {
hop_limit = skb_tunnel_info(skb)->key.ttl;
goto route_lookup;
@@ -1100,7 +1096,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
/* NBMA tunnel */
if (ipv6_addr_any(&t->parms.raddr)) {
- if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (payload_protocol == htons(ETH_P_IPV6)) {
struct in6_addr *addr6;
struct neighbour *neigh;
int addr_type;
@@ -1121,6 +1117,14 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
+ } else if (payload_protocol == htons(ETH_P_IP)) {
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (!rt)
+ goto tx_err_link_failure;
+
+ if (rt->rt_gw_family == AF_INET6)
+ memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr));
}
} else if (t->parms.proto != 0 && !(t->parms.flags &
(IP6_TNL_F_USE_ORIG_TCLASS |
@@ -1224,9 +1228,9 @@ route_lookup:
skb_dst_set(skb, dst);
if (hop_limit == 0) {
- if (skb->protocol == htons(ETH_P_IP))
+ if (payload_protocol == htons(ETH_P_IP))
hop_limit = ip_hdr(skb)->ttl;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (payload_protocol == htons(ETH_P_IPV6))
hop_limit = ipv6_hdr(skb)->hop_limit;
else
hop_limit = ip6_dst_hoplimit(dst);
@@ -1446,8 +1450,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct net_device *tdev = NULL;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
- unsigned int mtu;
int t_hlen;
+ int mtu;
__dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -1494,12 +1498,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
dev->hard_header_len = tdev->hard_header_len + t_hlen;
mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
- dev->mtu = mtu - t_hlen;
+ mtu = mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+ mtu -= 8;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
}
@@ -1513,7 +1518,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
* ip6_tnl_change() updates the tunnel parameters
**/
-static int
+static void
ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
{
t->parms.laddr = p->laddr;
@@ -1527,29 +1532,25 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
t->parms.fwmark = p->fwmark;
dst_cache_reset(&t->dst_cache);
ip6_tnl_link_config(t);
- return 0;
}
-static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- int err;
ip6_tnl_unlink(ip6n, t);
synchronize_net();
- err = ip6_tnl_change(t, p);
+ ip6_tnl_change(t, p);
ip6_tnl_link(ip6n, t);
netdev_state_change(t->dev);
- return err;
}
-static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
/* for default tnl0 device allow to change only the proto */
t->parms.proto = p->proto;
netdev_state_change(t->dev);
- return 0;
}
static void
@@ -1663,9 +1664,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
} else
t = netdev_priv(dev);
if (dev == ip6n->fb_tnl_dev)
- err = ip6_tnl0_update(t, &p1);
+ ip6_tnl0_update(t, &p1);
else
- err = ip6_tnl_update(t, &p1);
+ ip6_tnl_update(t, &p1);
}
if (!IS_ERR(t)) {
err = 0;
@@ -1883,7 +1884,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
- dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
return 0;
destroy_dst:
@@ -1988,39 +1989,6 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
-static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
- struct ip_tunnel_encap *ipencap)
-{
- bool ret = false;
-
- memset(ipencap, 0, sizeof(*ipencap));
-
- if (!data)
- return ret;
-
- if (data[IFLA_IPTUN_ENCAP_TYPE]) {
- ret = true;
- ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
- ret = true;
- ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_SPORT]) {
- ret = true;
- ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_DPORT]) {
- ret = true;
- ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
- }
-
- return ret;
-}
-
static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -2033,7 +2001,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
nt = netdev_priv(dev);
- if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip6_tnl_encap_setup(nt, &ipencap);
if (err < 0)
return err;
@@ -2070,7 +2038,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
if (dev == ip6n->fb_tnl_dev)
return -EINVAL;
- if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
int err = ip6_tnl_encap_setup(t, &ipencap);
if (err < 0)
@@ -2087,7 +2055,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
} else
t = netdev_priv(dev);
- return ip6_tnl_update(t, &p);
+ ip6_tnl_update(t, &p);
+ return 0;
}
static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 3a434d75925c..151337d7f67b 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -154,7 +154,7 @@ vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms);
- rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -211,7 +211,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
if (p->name[0]) {
if (!dev_valid_name(p->name))
goto failed;
- strlcpy(name, p->name, IFNAMSIZ);
+ strscpy(name, p->name, IFNAMSIZ);
} else {
sprintf(name, "ip6_vti%%d");
}
@@ -293,7 +293,7 @@ static void vti6_dev_uninit(struct net_device *dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
vti6_tnl_unlink(ip6n, t);
- dev_put_track(dev, &t->dev_tracker);
+ netdev_put(dev, &t->dev_tracker);
}
static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
@@ -936,7 +936,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
- dev_hold_track(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
return 0;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7cf73e60e619..facdc78a43e5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -62,7 +62,12 @@ struct ip6mr_result {
Note that the changes are semaphored via rtnl_lock.
*/
-static DEFINE_RWLOCK(mrt_lock);
+static DEFINE_SPINLOCK(mrt_lock);
+
+static struct net_device *vif_dev_read(const struct vif_device *vif)
+{
+ return rcu_dereference(vif->dev);
+}
/* Multicast router control variables */
@@ -85,11 +90,13 @@ static void ip6mr_free_table(struct mr_table *mrt);
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
+static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
static void mroute_clean_tables(struct mr_table *mrt, int flags);
@@ -243,7 +250,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ip6mr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
@@ -253,13 +262,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
fib_rules_unregister(net->ipv6.mr6_rules_ops);
- rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -316,10 +324,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- rtnl_lock();
+ ASSERT_RTNL();
ip6mr_free_table(net->ipv6.mrt6);
net->ipv6.mrt6 = NULL;
- rtnl_unlock();
}
static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
@@ -398,7 +405,7 @@ static void ip6mr_free_table(struct mr_table *mrt)
*/
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(mrt_lock)
+ __acquires(RCU)
{
struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
@@ -410,14 +417,14 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
- read_lock(&mrt_lock);
+ rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
- __releases(mrt_lock)
+ __releases(RCU)
{
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
}
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
@@ -430,7 +437,11 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const struct net_device *vif_dev;
+ const char *name;
+
+ vif_dev = vif_dev_read(vif);
+ name = vif_dev ? vif_dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
@@ -549,13 +560,11 @@ static int pim6_rcv(struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
goto drop;
- reg_vif_num = mrt->mroute_reg_vif_num;
- read_lock(&mrt_lock);
+ /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
+ reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif_table[reg_vif_num].dev;
- dev_hold(reg_dev);
- read_unlock(&mrt_lock);
+ reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
if (!reg_dev)
goto drop;
@@ -570,7 +579,6 @@ static int pim6_rcv(struct sk_buff *skb)
netif_rx(skb);
- dev_put(reg_dev);
return 0;
drop:
kfree_skb(skb);
@@ -600,11 +608,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
goto tx_err;
- read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
dev->stats.tx_packets++;
- ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
- read_unlock(&mrt_lock);
+ rcu_read_lock();
+ ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
+ MRT6MSG_WHOLEPKT);
+ rcu_read_unlock();
kfree_skb(skb);
return NETDEV_TX_OK;
@@ -670,10 +679,11 @@ failure:
static int call_ip6mr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
+ struct net_device *vif_dev,
mifi_t vif_index, u32 tb_id)
{
return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
- vif, vif_index, tb_id,
+ vif, vif_dev, vif_index, tb_id,
&net->ipv6.ipmr_seq);
}
@@ -698,23 +708,21 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
- if (VIF_EXISTS(mrt, vifi))
- call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
- FIB_EVENT_VIF_DEL, v, vifi,
- mrt->id);
-
- write_lock_bh(&mrt_lock);
- dev = v->dev;
- v->dev = NULL;
-
- if (!dev) {
- write_unlock_bh(&mrt_lock);
+ dev = rtnl_dereference(v->dev);
+ if (!dev)
return -EADDRNOTAVAIL;
- }
+
+ call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_VIF_DEL, v, dev,
+ vifi, mrt->id);
+ spin_lock(&mrt_lock);
+ RCU_INIT_POINTER(v->dev, NULL);
#ifdef CONFIG_IPV6_PIMSM_V2
- if (vifi == mrt->mroute_reg_vif_num)
- mrt->mroute_reg_vif_num = -1;
+ if (vifi == mrt->mroute_reg_vif_num) {
+ /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
+ WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
+ }
#endif
if (vifi + 1 == mrt->maxvif) {
@@ -723,16 +731,16 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
if (VIF_EXISTS(mrt, tmp))
break;
}
- mrt->maxvif = tmp + 1;
+ WRITE_ONCE(mrt->maxvif, tmp + 1);
}
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
dev_set_allmulti(dev, -1);
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding--;
+ atomic_dec(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -741,7 +749,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
if ((v->flags & MIFF_REGISTER) && !notify)
unregister_netdevice_queue(dev, head);
- dev_put_track(dev, &v->dev_tracker);
+ netdev_put(dev, &v->dev_tracker);
return 0;
}
@@ -826,7 +834,7 @@ static void ipmr_expire_process(struct timer_list *t)
spin_unlock(&mfc_unres_lock);
}
-/* Fill oifs list. It is called under write locked mrt_lock. */
+/* Fill oifs list. It is called under locked mrt_lock. */
static void ip6mr_update_thresholds(struct mr_table *mrt,
struct mr_mfc *cache,
@@ -900,7 +908,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding++;
+ atomic_inc(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -912,18 +920,18 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
MIFF_REGISTER);
/* And finish update writing critical data */
- write_lock_bh(&mrt_lock);
- v->dev = dev;
+ spin_lock(&mrt_lock);
+ rcu_assign_pointer(v->dev, dev);
netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
#ifdef CONFIG_IPV6_PIMSM_V2
if (v->flags & MIFF_REGISTER)
- mrt->mroute_reg_vif_num = vifi;
+ WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
#endif
if (vifi + 1 > mrt->maxvif)
- mrt->maxvif = vifi + 1;
- write_unlock_bh(&mrt_lock);
+ WRITE_ONCE(mrt->maxvif, vifi + 1);
+ spin_unlock(&mrt_lock);
call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
+ v, dev, vifi, mrt->id);
return 0;
}
@@ -1020,18 +1028,21 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
}
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
- } else
+ } else {
+ rcu_read_lock();
ip6_mr_forward(net, mrt, skb->dev, skb, c);
+ rcu_read_unlock();
+ }
}
}
/*
* Bounce a cache query up to pim6sd and netlink.
*
- * Called under mrt_lock.
+ * Called under rcu_read_lock()
*/
-static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
struct sock *mroute6_sk;
@@ -1040,7 +1051,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
int ret;
#ifdef CONFIG_IPV6_PIMSM_V2
- if (assert == MRT6MSG_WHOLEPKT)
+ if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
+sizeof(*msg));
else
@@ -1056,7 +1067,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
#ifdef CONFIG_IPV6_PIMSM_V2
- if (assert == MRT6MSG_WHOLEPKT) {
+ if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
/* Ugly, but we have no choice with this interface.
Duplicate old header, fix length etc.
And all this only to mangle msg->im6_msgtype and
@@ -1068,8 +1079,11 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
skb_reset_transport_header(skb);
msg = (struct mrt6msg *)skb_transport_header(skb);
msg->im6_mbz = 0;
- msg->im6_msgtype = MRT6MSG_WHOLEPKT;
- msg->im6_mif = mrt->mroute_reg_vif_num;
+ msg->im6_msgtype = assert;
+ if (assert == MRT6MSG_WRMIFWHOLE)
+ msg->im6_mif = mifi;
+ else
+ msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
msg->im6_pad = 0;
msg->im6_src = ipv6_hdr(pkt)->saddr;
msg->im6_dst = ipv6_hdr(pkt)->daddr;
@@ -1104,10 +1118,8 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- rcu_read_lock();
mroute6_sk = rcu_dereference(mrt->mroute_sk);
if (!mroute6_sk) {
- rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
@@ -1116,7 +1128,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
/* Deliver to user space multicast routing algorithms */
ret = sock_queue_rcv_skb(mroute6_sk, skb);
- rcu_read_unlock();
+
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1240,7 +1252,7 @@ static int ip6mr_device_event(struct notifier_block *this,
ip6mr_for_each_table(mrt, net) {
v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
- if (v->dev == dev)
+ if (rcu_access_pointer(v->dev) == dev)
mif6_delete(mrt, ct, 1, NULL);
}
}
@@ -1259,7 +1271,7 @@ static int ip6mr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
- ip6mr_mr_table_iter, &mrt_lock, extack);
+ ip6mr_mr_table_iter, extack);
}
static struct notifier_block ip6_mr_notifier = {
@@ -1323,7 +1335,9 @@ static int __net_init ip6mr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip6_mr_vif", net->proc_net);
proc_vif_fail:
+ rtnl_lock();
ip6mr_rules_exit(net);
+ rtnl_unlock();
#endif
ip6mr_rules_fail:
ip6mr_notifier_exit(net);
@@ -1336,13 +1350,23 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_cache", net->proc_net);
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
- ip6mr_rules_exit(net);
ip6mr_notifier_exit(net);
}
+static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ ip6mr_rules_exit(net);
+ rtnl_unlock();
+}
+
static struct pernet_operations ip6mr_net_ops = {
.init = ip6mr_net_init,
.exit = ip6mr_net_exit,
+ .exit_batch = ip6mr_net_exit_batch,
};
int __init ip6_mr_init(void)
@@ -1371,7 +1395,7 @@ int __init ip6_mr_init(void)
}
#endif
err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
- NULL, ip6mr_rtm_dumproute, 0);
+ ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
if (err == 0)
return 0;
@@ -1422,12 +1446,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
&mfc->mf6cc_mcastgrp.sin6_addr, parent);
rcu_read_unlock();
if (c) {
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
c->_c.mfc_parent = mfc->mf6cc_parent;
ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
c->_c.mfc_flags |= MFC_STATIC;
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
@@ -1545,15 +1569,15 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
struct net *net = sock_net(sk);
rtnl_lock();
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
} else {
rcu_assign_pointer(mrt->mroute_sk, sk);
sock_set_flag(sk, SOCK_RCU_FREE);
- net->ipv6.devconf_all->mc_forwarding++;
+ atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
}
- write_unlock_bh(&mrt_lock);
+ spin_unlock(&mrt_lock);
if (!err)
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1567,25 +1591,30 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
int ip6mr_sk_done(struct sock *sk)
{
- int err = -EACCES;
struct net *net = sock_net(sk);
+ struct ipv6_devconf *devconf;
struct mr_table *mrt;
+ int err = -EACCES;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return err;
+ devconf = net->ipv6.devconf_all;
+ if (!devconf || !atomic_read(&devconf->mc_forwarding))
+ return err;
+
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
- write_lock_bh(&mrt_lock);
+ spin_lock(&mrt_lock);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
/* Note that mroute_sk had SOCK_RCU_FREE set,
* so the RCU grace period before sk freeing
* is guaranteed by sk_destruct()
*/
- net->ipv6.devconf_all->mc_forwarding--;
- write_unlock_bh(&mrt_lock);
+ atomic_dec(&devconf->mc_forwarding);
+ spin_unlock(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
@@ -1740,18 +1769,22 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
#ifdef CONFIG_IPV6_PIMSM_V2
case MRT6_PIM:
{
+ bool do_wrmifwhole;
int v;
if (optlen != sizeof(v))
return -EINVAL;
if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
+
+ do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
v = !!v;
rtnl_lock();
ret = 0;
if (v != mrt->mroute_do_pim) {
mrt->mroute_do_pim = v;
mrt->mroute_do_assert = v;
+ mrt->mroute_do_wrvifwhole = do_wrmifwhole;
}
rtnl_unlock();
return ret;
@@ -1797,8 +1830,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
* Getsock opt support for the multicast routing system.
*/
-int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen)
+int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
+ sockptr_t optlen)
{
int olr;
int val;
@@ -1829,16 +1862,16 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
return -ENOPROTOOPT;
}
- if (get_user(olr, optlen))
+ if (copy_from_sockptr(&olr, optlen, sizeof(int)))
return -EFAULT;
olr = min_t(int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
- if (put_user(olr, optlen))
+ if (copy_to_sockptr(optlen, &olr, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, olr))
+ if (copy_to_sockptr(optval, &val, olr))
return -EFAULT;
return 0;
}
@@ -1867,20 +1900,20 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT_IN6:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1942,20 +1975,20 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
- read_lock(&mrt_lock);
+ rcu_read_lock();
vif = &mrt->vif_table[vr.mifi];
if (VIF_EXISTS(mrt, vr.mifi)) {
- vr.icount = vif->pkt_in;
- vr.ocount = vif->pkt_out;
- vr.ibytes = vif->bytes_in;
- vr.obytes = vif->bytes_out;
- read_unlock(&mrt_lock);
+ vr.icount = READ_ONCE(vif->pkt_in);
+ vr.ocount = READ_ONCE(vif->pkt_out);
+ vr.ibytes = READ_ONCE(vif->bytes_in);
+ vr.obytes = READ_ONCE(vif->bytes_out);
+ rcu_read_unlock();
if (copy_to_user(arg, &vr, sizeof(vr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT_IN6:
if (copy_from_user(&sr, arg, sizeof(sr)))
@@ -1997,21 +2030,22 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, int vifi)
{
- struct ipv6hdr *ipv6h;
struct vif_device *vif = &mrt->vif_table[vifi];
- struct net_device *dev;
+ struct net_device *vif_dev;
+ struct ipv6hdr *ipv6h;
struct dst_entry *dst;
struct flowi6 fl6;
- if (!vif->dev)
+ vif_dev = vif_dev_read(vif);
+ if (!vif_dev)
goto out_free;
#ifdef CONFIG_IPV6_PIMSM_V2
if (vif->flags & MIFF_REGISTER) {
- vif->pkt_out++;
- vif->bytes_out += skb->len;
- vif->dev->stats.tx_bytes += skb->len;
- vif->dev->stats.tx_packets++;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
+ vif_dev->stats.tx_bytes += skb->len;
+ vif_dev->stats.tx_packets++;
ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
goto out_free;
}
@@ -2044,14 +2078,13 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- dev = vif->dev;
- skb->dev = dev;
- vif->pkt_out++;
- vif->bytes_out += skb->len;
+ skb->dev = vif_dev;
+ WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
+ WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
/* We are about to write */
/* XXX: extension headers? */
- if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
+ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
goto out_free;
ipv6h = ipv6_hdr(skb);
@@ -2060,7 +2093,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, dev,
+ net, NULL, skb, skb->dev, vif_dev,
ip6mr_forward2_finish);
out_free:
@@ -2068,17 +2101,20 @@ out_free:
return 0;
}
+/* Called with rcu_read_lock() */
static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
- for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif_table[ct].dev == dev)
+ /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
+ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
+ if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
break;
}
return ct;
}
+/* Called under rcu_read_lock() */
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
struct mfc6_cache *c)
@@ -2098,20 +2134,16 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- rcu_read_lock();
cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
- rcu_read_unlock();
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
- }
- rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif_table[vif].dev != dev) {
+ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
@@ -2127,13 +2159,18 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
MFC_ASSERT_THRESH)) {
c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
+ if (mrt->mroute_do_wrvifwhole)
+ ip6mr_cache_report(mrt, skb, true_vifi,
+ MRT6MSG_WRMIFWHOLE);
}
goto dont_forward;
}
forward:
- mrt->vif_table[vif].pkt_in++;
- mrt->vif_table[vif].bytes_in += skb->len;
+ WRITE_ONCE(mrt->vif_table[vif].pkt_in,
+ mrt->vif_table[vif].pkt_in + 1);
+ WRITE_ONCE(mrt->vif_table[vif].bytes_in,
+ mrt->vif_table[vif].bytes_in + skb->len);
/*
* Forward the frame
@@ -2211,7 +2248,6 @@ int ip6_mr_input(struct sk_buff *skb)
return err;
}
- read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
if (!cache) {
@@ -2232,19 +2268,15 @@ int ip6_mr_input(struct sk_buff *skb)
vif = ip6mr_find_vif(mrt, dev);
if (vif >= 0) {
int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
- read_unlock(&mrt_lock);
return err;
}
- read_unlock(&mrt_lock);
kfree_skb(skb);
return -ENODEV;
}
ip6_mr_forward(net, mrt, dev, skb, cache);
- read_unlock(&mrt_lock);
-
return 0;
}
@@ -2260,7 +2292,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
if (!mrt)
return -ENOENT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2278,14 +2310,14 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
dev = skb->dev;
if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -ENODEV;
}
/* really correct? */
skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
if (!skb2) {
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -ENOMEM;
}
@@ -2308,13 +2340,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
iph->daddr = rt->rt6i_dst.addr;
err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return err;
}
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return err;
}
@@ -2433,7 +2465,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2481,6 +2513,95 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
}
+static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
+ [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+ [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+ [RTA_TABLE] = { .type = NLA_U32 },
+};
+
+static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct rtmsg *rtm;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
+ extack);
+ if (err)
+ return err;
+
+ rtm = nlmsg_data(nlh);
+ if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
+ (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
+ rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
+ rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Invalid values in header for multicast route get request");
+ return -EINVAL;
+ }
+
+ if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
+ (tb[RTA_DST] && !rtm->rtm_dst_len)) {
+ NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct in6_addr src = {}, grp = {};
+ struct nlattr *tb[RTA_MAX + 1];
+ struct mfc6_cache *cache;
+ struct mr_table *mrt;
+ struct sk_buff *skb;
+ u32 tableid;
+ int err;
+
+ err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[RTA_SRC])
+ src = nla_get_in6_addr(tb[RTA_SRC]);
+ if (tb[RTA_DST])
+ grp = nla_get_in6_addr(tb[RTA_DST]);
+ tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+
+ mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
+ if (!mrt) {
+ NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
+ return -ENOENT;
+ }
+
+ /* entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ cache = ip6mr_cache_find(mrt, &src, &grp);
+ rcu_read_unlock();
+ if (!cache) {
+ NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
+ return -ENOENT;
+ }
+
+ skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
+ if (err < 0) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+}
+
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 15f984be3570..72d4858dec18 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -136,7 +136,8 @@ out:
return err;
}
-static int ipcomp6_init_state(struct xfrm_state *x)
+static int ipcomp6_init_state(struct xfrm_state *x,
+ struct netlink_ext_ack *extack)
{
int err = -EINVAL;
@@ -148,17 +149,20 @@ static int ipcomp6_init_state(struct xfrm_state *x)
x->props.header_len += sizeof(struct ipv6hdr);
break;
default:
+ NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp");
goto out;
}
- err = ipcomp_init_state(x);
+ err = ipcomp_init_state(x, extack);
if (err)
goto out;
if (x->props.mode == XFRM_MODE_TUNNEL) {
err = ipcomp6_tunnel_attach(x);
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state");
goto out;
+ }
}
err = 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a733803a710c..532f4478c884 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -327,7 +327,7 @@ static int ipv6_set_opt_hdr(struct sock *sk, int optname, sockptr_t optval,
int err;
/* hop-by-hop / destination options are privileged option */
- if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+ if (optname != IPV6_RTHDR && !sockopt_ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
/* remove any sticky options header with a zero option
@@ -391,8 +391,8 @@ sticky_done:
return err;
}
-static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen)
+int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
@@ -417,7 +417,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (needs_rtnl)
rtnl_lock();
- lock_sock(sk);
+ sockopt_lock_sock(sk);
+
+ /* Another thread has converted the socket into IPv4 with
+ * IPV6_ADDRFORM concurrently.
+ */
+ if (unlikely(sk->sk_family != AF_INET6))
+ goto unlock;
switch (optname) {
@@ -425,9 +431,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
if (val == PF_INET) {
- struct ipv6_txoptions *opt;
- struct sk_buff *pktopt;
-
if (sk->sk_type == SOCK_RAW)
break;
@@ -458,7 +461,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
}
- fl6_free_socklist(sk);
__ipv6_sock_mc_close(sk);
__ipv6_sock_ac_close(sk);
@@ -475,8 +477,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sock_prot_inuse_add(net, sk->sk_prot, -1);
sock_prot_inuse_add(net, &tcp_prot, 1);
- sk->sk_prot = &tcp_prot;
- icsk->icsk_af_ops = &ipv4_specific;
+ /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */
+ WRITE_ONCE(sk->sk_prot, &tcp_prot);
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
sk->sk_socket->ops = &inet_stream_ops;
sk->sk_family = PF_INET;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
@@ -489,18 +493,19 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sock_prot_inuse_add(net, sk->sk_prot, -1);
sock_prot_inuse_add(net, prot, 1);
- sk->sk_prot = prot;
+ /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */
+ WRITE_ONCE(sk->sk_prot, prot);
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg((__force struct ipv6_txoptions **)&np->opt,
- NULL);
- if (opt) {
- atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
- txopt_put(opt);
- }
- pktopt = xchg(&np->pktoptions, NULL);
- kfree_skb(pktopt);
+
+ /* Disable all options not to allocate memory anymore,
+ * but there is still a race. See the lockless path
+ * in udpv6_sendmsg() and ipv6_local_rxpmtu().
+ */
+ np->rxopt.all = 0;
+
+ inet6_cleanup_sock(sk);
/*
* ... and add it to the refcnt debug socks count
@@ -632,8 +637,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (valbool && !ns_capable(net->user_ns, CAP_NET_RAW) &&
- !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ if (valbool && !sockopt_ns_capable(net->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(net->user_ns, CAP_NET_ADMIN)) {
retv = -EPERM;
break;
}
@@ -944,7 +949,7 @@ done:
case IPV6_IPSEC_POLICY:
case IPV6_XFRM_POLICY:
retv = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (!sockopt_ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
@@ -992,14 +997,15 @@ done:
break;
}
- release_sock(sk);
+unlock:
+ sockopt_release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
return retv;
e_inval:
- release_sock(sk);
+ sockopt_release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
return -EINVAL;
@@ -1028,7 +1034,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
EXPORT_SYMBOL(ipv6_setsockopt);
static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
- int optname, char __user *optval, int len)
+ int optname, sockptr_t optval, int len)
{
struct ipv6_opt_hdr *hdr;
@@ -1056,56 +1062,53 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
return 0;
len = min_t(unsigned int, len, ipv6_optlen(hdr));
- if (copy_to_user(optval, hdr, len))
+ if (copy_to_sockptr(optval, hdr, len))
return -EFAULT;
return len;
}
-static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int ipv6_get_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
const int size0 = offsetof(struct group_filter, gf_slist_flex);
- struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
int err;
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gsf, p, size0))
+ if (copy_from_sockptr(&gsf, optval, size0))
return -EFAULT;
if (gsf.gf_group.ss_family != AF_INET6)
return -EADDRNOTAVAIL;
num = gsf.gf_numsrc;
- lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
+ sockopt_lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gsf, optval, size0);
if (!err) {
if (num > gsf.gf_numsrc)
num = gsf.gf_numsrc;
- if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
- copy_to_user(p, &gsf, size0))
+ len = GROUP_FILTER_SIZE(num);
+ if (copy_to_sockptr(optlen, &len, sizeof(int)) ||
+ copy_to_sockptr(optval, &gsf, size0))
err = -EFAULT;
}
- release_sock(sk);
+ sockopt_release_sock(sk);
return err;
}
-static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen)
+static int compat_ipv6_get_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
- struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
- int len, err;
+ int err;
int num;
- if (get_user(len, optlen))
- return -EFAULT;
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gf32, p, size0))
+ if (copy_from_sockptr(&gf32, optval, size0))
return -EFAULT;
gf.gf_interface = gf32.gf_interface;
gf.gf_fmode = gf32.gf_fmode;
@@ -1115,23 +1118,25 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
if (gf.gf_group.ss_family != AF_INET6)
return -EADDRNOTAVAIL;
- lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
- release_sock(sk);
+ sockopt_lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gf, optval, size0);
+ sockopt_release_sock(sk);
if (err)
return err;
if (num > gf.gf_numsrc)
num = gf.gf_numsrc;
len = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32));
- if (put_user(len, optlen) ||
- put_user(gf.gf_fmode, &p->gf_fmode) ||
- put_user(gf.gf_numsrc, &p->gf_numsrc))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode),
+ &gf.gf_fmode, sizeof(gf32.gf_fmode)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc),
+ &gf.gf_numsrc, sizeof(gf32.gf_numsrc)))
return -EFAULT;
return 0;
}
-static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen, unsigned int flags)
+int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen)
{
struct ipv6_pinfo *np = inet6_sk(sk);
int len;
@@ -1140,7 +1145,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (ip6_mroute_opt(optname))
return ip6_mroute_getsockopt(sk, optname, optval, optlen);
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
switch (optname) {
case IPV6_ADDRFORM:
@@ -1154,7 +1159,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case MCAST_MSFILTER:
if (in_compat_syscall())
- return compat_ipv6_get_msfilter(sk, optval, optlen);
+ return compat_ipv6_get_msfilter(sk, optval, optlen, len);
return ipv6_get_msfilter(sk, optval, optlen, len);
case IPV6_2292PKTOPTIONS:
{
@@ -1164,16 +1169,21 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control_user = optval;
+ if (optval.is_kernel) {
+ msg.msg_control_is_user = false;
+ msg.msg_control = optval.kernel;
+ } else {
+ msg.msg_control_is_user = true;
+ msg.msg_control_user = optval.user;
+ }
msg.msg_controllen = len;
- msg.msg_flags = flags;
- msg.msg_control_is_user = true;
+ msg.msg_flags = 0;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
skb = np->pktoptions;
if (skb)
ip6_datagram_recv_ctl(sk, &msg, skb);
- release_sock(sk);
+ sockopt_release_sock(sk);
if (!skb) {
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
@@ -1210,7 +1220,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
}
len -= msg.msg_controllen;
- return put_user(len, optlen);
+ return copy_to_sockptr(optlen, &len, sizeof(int));
}
case IPV6_MTU:
{
@@ -1262,15 +1272,15 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
{
struct ipv6_txoptions *opt;
- lock_sock(sk);
+ sockopt_lock_sock(sk);
opt = rcu_dereference_protected(np->opt,
lockdep_sock_is_held(sk));
len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
- release_sock(sk);
+ sockopt_release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
return len;
- return put_user(len, optlen);
+ return copy_to_sockptr(optlen, &len, sizeof(int));
}
case IPV6_RECVHOPOPTS:
@@ -1324,9 +1334,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (!mtuinfo.ip6m_mtu)
return -ENOTCONN;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &mtuinfo, len))
+ if (copy_to_sockptr(optval, &mtuinfo, len))
return -EFAULT;
return 0;
@@ -1403,7 +1413,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (len < sizeof(freq))
return -EINVAL;
- if (copy_from_user(&freq, optval, sizeof(freq)))
+ if (copy_from_sockptr(&freq, optval, sizeof(freq)))
return -EFAULT;
if (freq.flr_action != IPV6_FL_A_GET)
@@ -1418,9 +1428,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (val < 0)
return val;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &freq, len))
+ if (copy_to_sockptr(optval, &freq, len))
return -EFAULT;
return 0;
@@ -1472,9 +1482,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
return -ENOPROTOOPT;
}
len = min_t(unsigned int, sizeof(int), len);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, len))
+ if (copy_to_sockptr(optval, &val, len))
return -EFAULT;
return 0;
}
@@ -1490,7 +1500,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
if (level != SOL_IPV6)
return -ENOPROTOOPT;
- err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
+ err = do_ipv6_getsockopt(sk, level, optname,
+ USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bed8155508c8..7860383295d8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
newpsl->sl_addr[i] = psl->sl_addr[i];
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
+ rcu_assign_pointer(pmc->sflist, newpsl);
+ kfree_rcu(psl, rcu);
psl = newpsl;
- rcu_assign_pointer(pmc->sflist, psl);
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
for (i = 0; i < psl->sl_count; i++) {
@@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
psl->sl_count, psl->sl_addr, 0);
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
}
- mutex_unlock(&idev->mc_lock);
rcu_assign_pointer(pmc->sflist, newpsl);
+ mutex_unlock(&idev->mc_lock);
+ kfree_rcu(psl, rcu);
pmc->sfmode = gsf->gf_fmode;
err = 0;
done:
@@ -580,7 +580,7 @@ done:
}
int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
- struct sockaddr_storage __user *p)
+ sockptr_t optval, size_t ss_offset)
{
struct ipv6_pinfo *inet6 = inet6_sk(sk);
const struct in6_addr *group;
@@ -612,8 +612,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
-
- for (i = 0; i < copycount; i++, p++) {
+ for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
struct sockaddr_storage ss;
@@ -621,8 +620,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
memset(&ss, 0, sizeof(ss));
psin6->sin6_family = AF_INET6;
psin6->sin6_addr = psl->sl_addr[i];
- if (copy_to_user(p, &ss, sizeof(ss)))
+ if (copy_to_sockptr_offset(optval, ss_offset, &ss, sizeof(ss)))
return -EFAULT;
+ ss_offset += sizeof(ss);
}
return 0;
}
@@ -1050,7 +1050,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
/* called with mc_lock */
static void mld_gq_start_work(struct inet6_dev *idev)
{
- unsigned long tv = prandom_u32() % idev->mc_maxdelay;
+ unsigned long tv = prandom_u32_max(idev->mc_maxdelay);
idev->mc_gq_running = 1;
if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2))
@@ -1068,7 +1068,7 @@ static void mld_gq_stop_work(struct inet6_dev *idev)
/* called with mc_lock */
static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay)
{
- unsigned long tv = prandom_u32() % delay;
+ unsigned long tv = prandom_u32_max(delay);
if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2))
in6_dev_hold(idev);
@@ -1085,7 +1085,7 @@ static void mld_ifc_stop_work(struct inet6_dev *idev)
/* called with mc_lock */
static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay)
{
- unsigned long tv = prandom_u32() % delay;
+ unsigned long tv = prandom_u32_max(delay);
if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2))
in6_dev_hold(idev);
@@ -1130,7 +1130,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
}
if (delay >= resptime)
- delay = prandom_u32() % resptime;
+ delay = prandom_u32_max(resptime);
if (!mod_delayed_work(mld_wq, &ma->mca_work, delay))
refcount_inc(&ma->mca_refcnt);
@@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
}
/* called with rcu_read_lock() */
-int igmp6_event_query(struct sk_buff *skb)
+void igmp6_event_query(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
- if (!idev)
- return -EINVAL;
-
- if (idev->dead) {
- kfree_skb(skb);
- return -ENODEV;
- }
+ if (!idev || idev->dead)
+ goto out;
spin_lock_bh(&idev->mc_query_lock);
if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
__skb_queue_tail(&idev->mc_query_queue, skb);
if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
in6_dev_hold(idev);
+ skb = NULL;
}
spin_unlock_bh(&idev->mc_query_lock);
-
- return 0;
+out:
+ kfree_skb(skb);
}
static void __mld_query_work(struct sk_buff *skb)
@@ -1526,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_query_work, 0);
break;
}
}
@@ -1537,32 +1532,30 @@ static void mld_query_work(struct work_struct *work)
__mld_query_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
/* called with rcu_read_lock() */
-int igmp6_event_report(struct sk_buff *skb)
+void igmp6_event_report(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
- if (!idev)
- return -EINVAL;
-
- if (idev->dead) {
- kfree_skb(skb);
- return -ENODEV;
- }
+ if (!idev || idev->dead)
+ goto out;
spin_lock_bh(&idev->mc_report_lock);
if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
__skb_queue_tail(&idev->mc_report_queue, skb);
if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
in6_dev_hold(idev);
+ skb = NULL;
}
spin_unlock_bh(&idev->mc_report_lock);
-
- return 0;
+out:
+ kfree_skb(skb);
}
static void __mld_report_work(struct sk_buff *skb)
@@ -1632,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_report_work, 0);
break;
}
}
@@ -1643,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
__mld_report_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
@@ -1759,7 +1753,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
- if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -2580,7 +2574,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
- delay = prandom_u32() % unsolicited_report_interval(ma->idev);
+ delay = prandom_u32_max(unsolicited_report_interval(ma->idev));
if (cancel_delayed_work(&ma->mca_work)) {
refcount_dec(&ma->mca_refcnt);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index aeb35d26e474..83d2a8be263f 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -247,15 +247,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
return err;
}
-static int mip6_destopt_init_state(struct xfrm_state *x)
+static int mip6_destopt_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
if (x->id.spi) {
- pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
+ NL_SET_ERR_MSG(extack, "SPI must be 0");
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
- pr_info("%s: state's mode is not %u: %u\n",
- __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION");
return -EINVAL;
}
@@ -333,15 +332,14 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
-static int mip6_rthdr_init_state(struct xfrm_state *x)
+static int mip6_rthdr_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
if (x->id.spi) {
- pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi);
+ NL_SET_ERR_MSG(extack, "SPI must be 0");
return -EINVAL;
}
if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
- pr_info("%s: state's mode is not %u: %u\n",
- __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+ NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION");
return -EINVAL;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f03b597e4121..3a553494ff16 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -128,6 +128,7 @@ struct neigh_table nd_tbl = {
[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER,
[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
+ [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ,
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
[NEIGH_VAR_PROXY_QLEN] = 64,
@@ -466,9 +467,8 @@ static void ip6_nd_hdr(struct sk_buff *skb,
hdr->daddr = *daddr;
}
-static void ndisc_send_skb(struct sk_buff *skb,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(skb->dev);
@@ -515,6 +515,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
+EXPORT_SYMBOL(ndisc_send_skb);
void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
@@ -598,22 +599,16 @@ static void ndisc_send_unsol_na(struct net_device *dev)
in6_dev_put(idev);
}
-void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr,
- u64 nonce)
+struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit,
+ const struct in6_addr *saddr, u64 nonce)
{
- struct sk_buff *skb;
- struct in6_addr addr_buf;
int inc_opt = dev->addr_len;
- int optlen = 0;
+ struct sk_buff *skb;
struct nd_msg *msg;
+ int optlen = 0;
- if (!saddr) {
- if (ipv6_get_lladdr(dev, &addr_buf,
- (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
- return;
- saddr = &addr_buf;
- }
+ if (!saddr)
+ return NULL;
if (ipv6_addr_any(saddr))
inc_opt = false;
@@ -625,7 +620,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
- return;
+ return NULL;
msg = skb_put(skb, sizeof(*msg));
*msg = (struct nd_msg) {
@@ -647,7 +642,28 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
memcpy(opt + 2, &nonce, 6);
}
- ndisc_send_skb(skb, daddr, saddr);
+ return skb;
+}
+EXPORT_SYMBOL(ndisc_ns_create);
+
+void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
+ const struct in6_addr *daddr, const struct in6_addr *saddr,
+ u64 nonce)
+{
+ struct in6_addr addr_buf;
+ struct sk_buff *skb;
+
+ if (!saddr) {
+ if (ipv6_get_lladdr(dev, &addr_buf,
+ (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
+ return;
+ saddr = &addr_buf;
+ }
+
+ skb = ndisc_ns_create(dev, solicit, saddr, nonce);
+
+ if (skb)
+ ndisc_send_skb(skb, daddr, saddr);
}
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
@@ -951,6 +967,25 @@ out:
in6_dev_put(idev);
}
+static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ switch (idev->cnf.accept_untracked_na) {
+ case 0: /* Don't accept untracked na (absent in neighbor cache) */
+ return 0;
+ case 1: /* Create new entries from na if currently untracked */
+ return 1;
+ case 2: /* Create new entries from untracked na only if saddr is in the
+ * same subnet as an address configured on the interface that
+ * received the na
+ */
+ return !!ipv6_chk_prefix(saddr, dev);
+ default:
+ return 0;
+ }
+}
+
static void ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -964,6 +999,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
+ u8 new_state;
if (skb->len < sizeof(struct nd_msg)) {
ND_PRINTK(2, warn, "NA: packet too short\n");
@@ -984,6 +1020,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/* For some 802.11 wireless deployments (and possibly other networks),
* there will be a NA proxy and unsolicitd packets are attacks
* and thus should not be accepted.
+ * drop_unsolicited_na takes precedence over accept_untracked_na
*/
if (!msg->icmph.icmp6_solicited && idev &&
idev->cnf.drop_unsolicited_na)
@@ -1024,9 +1061,33 @@ static void ndisc_recv_na(struct sk_buff *skb)
in6_ifa_put(ifp);
return;
}
+
neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
- if (neigh) {
+ /* RFC 9131 updates original Neighbour Discovery RFC 4861.
+ * NAs with Target LL Address option without a corresponding
+ * entry in the neighbour cache can now create a STALE neighbour
+ * cache entry on routers.
+ *
+ * entry accept fwding solicited behaviour
+ * ------- ------ ------ --------- ----------------------
+ * present X X 0 Set state to STALE
+ * present X X 1 Set state to REACHABLE
+ * absent 0 X X Do nothing
+ * absent 1 0 X Do nothing
+ * absent 1 1 X Add a new STALE entry
+ *
+ * Note that we don't do a (daddr == all-routers-mcast) check.
+ */
+ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
+ if (!neigh && lladdr && idev && idev->cnf.forwarding) {
+ if (accept_untracked_na(dev, saddr)) {
+ neigh = neigh_create(&nd_tbl, &msg->target, dev);
+ new_state = NUD_STALE;
+ }
+ }
+
+ if (neigh && !IS_ERR(neigh)) {
u8 old_flags = neigh->flags;
struct net *net = dev_net(dev);
@@ -1046,7 +1107,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
}
ndisc_update(dev, neigh, lladdr,
- msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+ new_state,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
@@ -1317,6 +1378,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (!rt && lifetime) {
ND_PRINTK(3, info, "RA: adding default router\n");
+ if (neigh)
+ neigh_release(neigh);
+
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
skb->dev, pref, defrtr_usr_metric);
if (!rt) {
@@ -1337,8 +1401,12 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
neigh->flags |= NTF_ROUTER;
- } else if (rt) {
+ } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
+ struct nl_info nlinfo = {
+ .nl_net = net,
+ };
rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
}
if (rt)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 6ab710b5a1a8..857713d7a38a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,14 +24,14 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(sk_partial);
+ struct net_device *dev = skb_dst(skb)->dev;
struct flow_keys flkeys;
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
- .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- strict ? skb_dst(skb)->dev->ifindex : 0,
+ .flowi6_l3mdev = l3mdev_master_ifindex(dev),
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -39,6 +39,11 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
};
int err;
+ if (sk && sk->sk_bound_dev_if)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ else if (strict)
+ fl6.flowi6_oif = dev->ifindex;
+
fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
dst = ip6_route_output(net, sk, &fl6);
err = dst->error;
@@ -121,6 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ bool mono_delivery_time = skb->mono_delivery_time;
ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
@@ -186,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -219,7 +225,7 @@ slow_path:
goto blackhole;
}
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 97d3d1b36dbc..0ba62f4868f9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -47,10 +47,6 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV6
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b85383606df7..b8d6dc9aeeb6 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
-# flow table support
-obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
-
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index d800801a5dd2..a01d9b842bd0 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -37,8 +37,10 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
bool ret = false;
struct flowi6 fl6 = {
.flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev),
.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
.flowi6_proto = iph->nexthdr,
+ .flowi6_uid = sock_net_uid(net, NULL),
.daddr = iph->saddr,
};
int lookup_flags;
@@ -55,9 +57,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
if (rpfilter_addr_linklocal(&iph->saddr)) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6.flowi6_oif = dev->ifindex;
- /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
- } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
- (flags & XT_RPFILTER_LOOSE) == 0)
+ } else if ((flags & XT_RPFILTER_LOOSE) == 0)
fl6.flowi6_oif = dev->ifindex;
rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
@@ -72,9 +72,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
goto out;
}
- if (rt->rt6i_idev->dev == dev ||
- l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
- (flags & XT_RPFILTER_LOOSE))
+ if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
ip6_rt_put(rt);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5c47be29b9ee..38db0064d661 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -86,7 +86,6 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[1].extra2 = &nf_frag->fqdir->high_thresh;
table[2].data = &nf_frag->fqdir->high_thresh;
table[2].extra1 = &nf_frag->fqdir->low_thresh;
- table[2].extra2 = &nf_frag->fqdir->high_thresh;
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
@@ -264,6 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
+ fq->q.mono_delivery_time = skb->mono_delivery_time;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ /dev/null
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index dffeaaaadcde..f61d4f18e1cf 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -31,7 +31,7 @@ static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
- if (!nf_reject_verify_csum(proto))
+ if (!nf_reject_verify_csum(skb, thoff, proto))
return true;
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
@@ -388,7 +388,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
- if (!nf_reject_verify_csum(proto))
+ if (!nf_reject_verify_csum(skb, thoff, proto))
return true;
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index aa5bb8789ba0..a7690ec62325 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -83,8 +83,8 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo, skb, doff,
- saddr, sport, daddr, dport,
+ return inet6_lookup(net, net->ipv4.tcp_death_row.hashinfo,
+ skb, doff, saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp6_lib_lookup(net, saddr, sport, daddr, dport,
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 6bac68fb27a3..929502e51203 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -80,6 +80,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
+ struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
struct sock *sk;
switch (protocol) {
@@ -93,7 +94,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ sk = inet6_lookup_listener(net, hinfo, skb,
thoff + __tcp_hdrlen(hp),
saddr, sport,
daddr, ntohs(dport),
@@ -108,9 +109,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
*/
break;
case NF_TPROXY_LOOKUP_ESTABLISHED:
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, ntohs(dport),
- in->ifindex, 0);
+ sk = __inet6_lookup_established(net, hinfo, saddr, sport, daddr,
+ ntohs(dport), in->ifindex, 0);
break;
default:
BUG();
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 3a00d95e964e..70a405b4006f 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -73,6 +73,7 @@ static const struct nft_expr_ops nft_dup_ipv6_ops = {
.eval = nft_dup_ipv6_eval,
.init = nft_dup_ipv6_init,
.dump = nft_dup_ipv6_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = {
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 92f3235fa287..36dc14b34388 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->daddr = iph->daddr;
fl6->saddr = iph->saddr;
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl6->flowi6_iif = nft_out(pkt)->ifindex;
+
fl6->daddr = iph->saddr;
fl6->saddr = iph->daddr;
}
@@ -37,6 +41,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+ } else if (priv->flags & NFTA_FIB_F_IIF) {
+ fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
}
if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
@@ -60,6 +66,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
struct flowi6 fl6 = {
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
+ .flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
};
u32 ret = 0;
@@ -157,6 +164,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct flowi6 fl6 = {
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
+ .flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
};
struct rt6_info *rt;
int lookup_flags;
@@ -193,7 +201,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev)
+ if (oif && oif != rt->rt6i_idev->dev &&
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
goto put_rt_err;
nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
@@ -211,6 +220,7 @@ static const struct nft_expr_ops nft_fib6_type_ops = {
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
+ .reduce = nft_fib_reduce,
};
static const struct nft_expr_ops nft_fib6_ops = {
@@ -220,6 +230,7 @@ static const struct nft_expr_ops nft_fib6_ops = {
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
+ .reduce = nft_fib_reduce,
};
static const struct nft_expr_ops *
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index ed69c768797e..5c61294f410e 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -46,6 +46,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = {
.init = nft_reject_init,
.dump = nft_reject_dump,
.validate = nft_reject_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 2880dc7d9a49..2685c3f15e9d 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -18,7 +18,7 @@ static u32 __ipv6_select_ident(struct net *net,
u32 id;
do {
- id = prandom_u32();
+ id = get_random_u32();
} while (!id);
return id;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 9256f6ba87ef..86c26e48d065 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -20,8 +20,14 @@
#include <net/udp.h>
#include <net/transp_v6.h>
#include <linux/proc_fs.h>
+#include <linux/bpf-cgroup.h>
#include <net/ping.h>
+static void ping_v6_destroy(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len)
@@ -44,6 +50,20 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
return 0;
}
+static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from __ip6_datagram_connect() and
+ * intended to prevent BPF program called below from accessing
+ * bytes that are out of the bound specified by user in addr_len.
+ */
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+}
+
static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
@@ -59,13 +79,13 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct pingfakehdr pfh;
struct ipcm6_cookie ipc6;
- pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
-
err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
sizeof(user_icmph));
if (err)
return err;
+ memset(&fl6, 0, sizeof(fl6));
+
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
if (msg->msg_namelen < sizeof(*u))
@@ -74,12 +94,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EAFNOSUPPORT;
}
daddr = &(u->sin6_addr);
+ if (np->sndflow)
+ fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
oif = u->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &sk->sk_v6_daddr;
+ fl6.flowlabel = np->flow_label;
}
if (!oif)
@@ -99,22 +122,37 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
return -EINVAL;
- /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+ ipcm6_init_sk(&ipc6, np);
+ ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.mark = sk->sk_mark;
- memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+
+ if (msg->msg_controllen) {
+ struct ipv6_txoptions opt = {};
+
+ opt.tot_len = sizeof(opt);
+ ipc6.opt = &opt;
+
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
+ if (err < 0)
+ return err;
+
+ /* Changes to txoptions and flow info are not implemented, yet.
+ * Drop the options.
+ */
+ ipc6.opt = NULL;
+ }
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- ipcm6_init_sk(&ipc6, np);
- ipc6.sockc.mark = sk->sk_mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
@@ -136,11 +174,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.wcheck = 0;
pfh.family = AF_INET6;
- ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
- 0, &ipc6, &fl6, rt,
+ sizeof(struct icmp6hdr), &ipc6, &fl6, rt,
MSG_DONTWAIT);
if (err) {
@@ -166,6 +205,8 @@ struct proto pingv6_prot = {
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
+ .destroy = ping_v6_destroy,
+ .pre_connect = ping_v6_pre_connect,
.connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c51d5ce3711c..722de9dd0ff7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -61,46 +61,30 @@
#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
-struct raw_hashinfo raw_v6_hashinfo = {
- .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
-};
+struct raw_hashinfo raw_v6_hashinfo;
EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
-struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
- unsigned short num, const struct in6_addr *loc_addr,
- const struct in6_addr *rmt_addr, int dif, int sdif)
+bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+ const struct in6_addr *loc_addr,
+ const struct in6_addr *rmt_addr, int dif, int sdif)
{
- bool is_multicast = ipv6_addr_is_multicast(loc_addr);
-
- sk_for_each_from(sk)
- if (inet_sk(sk)->inet_num == num) {
-
- if (!net_eq(sock_net(sk), net))
- continue;
-
- if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
- !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
- continue;
-
- if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
- dif, sdif))
- continue;
-
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
- goto found;
- if (is_multicast &&
- inet6_mc_check(sk, loc_addr, rmt_addr))
- goto found;
- continue;
- }
- goto found;
- }
- sk = NULL;
-found:
- return sk;
+ if (inet_sk(sk)->inet_num != num ||
+ !net_eq(sock_net(sk), net) ||
+ (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
+ !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+ dif, sdif))
+ return false;
+
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) ||
+ (ipv6_addr_is_multicast(loc_addr) &&
+ inet6_mc_check(sk, loc_addr, rmt_addr)))
+ return true;
+
+ return false;
}
-EXPORT_SYMBOL_GPL(__raw_v6_lookup);
+EXPORT_SYMBOL_GPL(raw_v6_match);
/*
* 0 - deliver
@@ -156,31 +140,27 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
*/
static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
{
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
const struct in6_addr *saddr;
const struct in6_addr *daddr;
struct sock *sk;
bool delivered = false;
__u8 hash;
- struct net *net;
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
-
- read_lock(&raw_v6_hashinfo.lock);
- sk = sk_head(&raw_v6_hashinfo.ht[hash]);
-
- if (!sk)
- goto out;
-
- net = dev_net(skb->dev);
- sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr,
- inet6_iif(skb), inet6_sdif(skb));
-
- while (sk) {
+ hlist = &raw_v6_hashinfo.ht[hash];
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
int filtered;
+ if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
+ inet6_iif(skb), inet6_sdif(skb)))
+ continue;
delivered = true;
switch (nexthdr) {
case IPPROTO_ICMPV6:
@@ -219,23 +199,14 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
rawv6_rcv(sk, clone);
}
}
- sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
- inet6_iif(skb), inet6_sdif(skb));
}
-out:
- read_unlock(&raw_v6_hashinfo.lock);
+ rcu_read_unlock();
return delivered;
}
bool raw6_local_deliver(struct sk_buff *skb, int nexthdr)
{
- struct sock *raw_sk;
-
- raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]);
- if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
- raw_sk = NULL;
-
- return raw_sk != NULL;
+ return ipv6_raw_deliver(skb, nexthdr);
}
/* This cleans up af_inet6 a bit. -DaveM */
@@ -361,30 +332,25 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
u8 type, u8 code, int inner_offset, __be32 info)
{
+ struct net *net = dev_net(skb->dev);
+ struct hlist_nulls_head *hlist;
+ struct hlist_nulls_node *hnode;
struct sock *sk;
int hash;
- const struct in6_addr *saddr, *daddr;
- struct net *net;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
-
- read_lock(&raw_v6_hashinfo.lock);
- sk = sk_head(&raw_v6_hashinfo.ht[hash]);
- if (sk) {
+ hlist = &raw_v6_hashinfo.ht[hash];
+ rcu_read_lock();
+ sk_nulls_for_each(sk, hnode, hlist) {
/* Note: ipv6_hdr(skb) != skb->data */
const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
- saddr = &ip6h->saddr;
- daddr = &ip6h->daddr;
- net = dev_net(skb->dev);
-
- while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
- inet6_iif(skb), inet6_iif(skb)))) {
- rawv6_err(sk, skb, NULL, type, code,
- inner_offset, info);
- sk = sk_next(sk);
- }
+
+ if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr,
+ inet6_iif(skb), inet6_iif(skb)))
+ continue;
+ rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
}
- read_unlock(&raw_v6_hashinfo.lock);
+ rcu_read_unlock();
}
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -460,7 +426,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
*/
static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -477,7 +443,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -512,7 +478,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
*addr_len = sizeof(*sin6);
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (np->rxopt.all)
ip6_datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 28e44782c94d..ff866f2a879e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -194,6 +194,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
+ fq->q.mono_delivery_time = skb->mono_delivery_time;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e6de94203c13..2f355f0ec32a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -130,6 +130,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list {
spinlock_t lock;
struct list_head head;
+ struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
@@ -149,43 +150,46 @@ void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
- struct net *net = dev_net(rt->dst.dev);
spin_lock_bh(&ul->lock);
- list_del(&rt->rt6i_uncached);
- atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
+ list_del_init(&rt->rt6i_uncached);
spin_unlock_bh(&ul->lock);
}
}
-static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
+static void rt6_uncached_list_flush_dev(struct net_device *dev)
{
- struct net_device *loopback_dev = net->loopback_dev;
int cpu;
- if (dev == loopback_dev)
- return;
-
for_each_possible_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
- struct rt6_info *rt;
+ struct rt6_info *rt, *safe;
+
+ if (list_empty(&ul->head))
+ continue;
spin_lock_bh(&ul->lock);
- list_for_each_entry(rt, &ul->head, rt6i_uncached) {
+ list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) {
struct inet6_dev *rt_idev = rt->rt6i_idev;
struct net_device *rt_dev = rt->dst.dev;
+ bool handled = false;
if (rt_idev->dev == dev) {
- rt->rt6i_idev = in6_dev_get(loopback_dev);
+ rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
+ handled = true;
}
if (rt_dev == dev) {
rt->dst.dev = blackhole_netdev;
- dev_replace_track(rt_dev, blackhole_netdev,
- &rt->dst.dev_tracker,
- GFP_ATOMIC);
+ netdev_ref_replace(rt_dev, blackhole_netdev,
+ &rt->dst.dev_tracker,
+ GFP_ATOMIC);
+ handled = true;
}
+ if (handled)
+ list_move(&rt->rt6i_uncached,
+ &ul->quarantine);
}
spin_unlock_bh(&ul->lock);
}
@@ -373,13 +377,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
- struct net_device *loopback_dev =
- dev_net(dev)->loopback_dev;
- if (idev && idev->dev != loopback_dev) {
- struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
+ if (idev && idev->dev != blackhole_netdev) {
+ struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
+
+ if (blackhole_idev) {
+ rt->rt6i_idev = blackhole_idev;
in6_dev_put(idev);
}
}
@@ -604,7 +607,7 @@ static void rt6_probe_deferred(struct work_struct *w)
addrconf_addr_solict_mult(&work->target, &mcaddr);
ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
- dev_put_track(work->dev, &work->dev_tracker);
+ netdev_put(work->dev, &work->dev_tracker);
kfree(work);
}
@@ -658,7 +661,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
} else {
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
- dev_hold_track(dev, &work->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
work->dev = dev;
schedule_work(&work->work);
}
@@ -1206,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_node *fn;
struct rt6_info *rt;
- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
- flags &= ~RT6_LOOKUP_F_IFACE;
-
rcu_read_lock();
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
@@ -2178,9 +2178,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
- oif = 0;
-
redo_rt6_select:
rt6_select(net, fn, oif, res, strict);
if (res->f6i == net->ipv6.fib6_null_entry) {
@@ -2244,7 +2241,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
* if caller sets RT6_LOOKUP_F_DST_NOREF flag.
*/
rt6_uncached_list_add(rt);
- atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
rcu_read_unlock();
return rt;
@@ -2802,7 +2798,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (from) {
fn = rcu_dereference(from->fib6_node);
if (fn && (rt->rt6i_flags & RTF_DEFAULT))
- fn->fn_sernum = -1;
+ WRITE_ONCE(fn->fn_sernum, -1);
}
}
rcu_read_unlock();
@@ -3056,12 +3052,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_info *rt;
struct fib6_node *fn;
- /* l3mdev_update_flow overrides oif if the device is enslaved; in
- * this case we must match on the real ingress device, so reset it
- */
- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
- fl6->flowi6_oif = skb->dev->ifindex;
-
/* Get the "current" route for this destination and
* check if the redirect has come from appropriate router.
*
@@ -3287,7 +3277,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
- atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
@@ -3303,6 +3292,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+ unsigned int val;
int entries;
entries = dst_entries_get_fast(ops);
@@ -3313,13 +3303,13 @@ static int ip6_dst_gc(struct dst_ops *ops)
entries <= rt_max_size)
goto out;
- net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+ fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
- net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
out:
- net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+ val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
return entries > rt_max_size;
}
@@ -4493,9 +4483,10 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
+ SKB_DR(reason);
int type;
- if (netif_is_l3_master(skb->dev) &&
+ if (netif_is_l3_master(skb->dev) ||
dst->dev == net->loopback_dev)
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
else
@@ -4505,11 +4496,14 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
+ SKB_DR_SET(reason, IP_INADDRERRORS);
IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
break;
}
+ SKB_DR_SET(reason, IP_INNOROUTES);
fallthrough;
case IPSTATS_MIB_OUTNOROUTES:
+ SKB_DR_OR(reason, IP_OUTNOROUTES);
IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
break;
}
@@ -4519,7 +4513,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
skb_dst_drop(skb);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
@@ -4575,8 +4569,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
}
f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
- if (!IS_ERR(f6i))
+ if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
+
+ if (!anycast &&
+ (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy))
+ f6i->dst_nopolicy = true;
+ }
+
return f6i;
}
@@ -4896,7 +4897,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
void rt6_disable_ip(struct net_device *dev, unsigned long event)
{
rt6_sync_down_dev(dev, event);
- rt6_uncached_list_flush_dev(dev_net(dev), dev);
+ rt6_uncached_list_flush_dev(dev);
neigh_ifdown(&nd_tbl, dev);
}
@@ -5009,6 +5010,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
+ if (rtm->rtm_tos) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid dsfield (tos): option not available for IPv6");
+ goto errout;
+ }
+
*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
@@ -5734,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (nexthop_is_blackhole(rt->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (net->ipv4.sysctl_nexthop_compat_mode &&
+ if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
goto nla_put_failure;
@@ -5753,11 +5760,11 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
if (!dst) {
- if (rt->offload)
+ if (READ_ONCE(rt->offload))
rtm->rtm_flags |= RTM_F_OFFLOAD;
- if (rt->trap)
+ if (READ_ONCE(rt->trap))
rtm->rtm_flags |= RTM_F_TRAP;
- if (rt->offload_failed)
+ if (READ_ONCE(rt->offload_failed))
rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
}
@@ -5934,7 +5941,7 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
rcu_read_unlock();
if (err)
- return count += w.count;
+ return count + w.count;
}
return -1;
@@ -6215,19 +6222,20 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
struct sk_buff *skb;
int err;
- if (f6i->offload == offload && f6i->trap == trap &&
- f6i->offload_failed == offload_failed)
+ if (READ_ONCE(f6i->offload) == offload &&
+ READ_ONCE(f6i->trap) == trap &&
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload = offload;
- f6i->trap = trap;
+ WRITE_ONCE(f6i->offload, offload);
+ WRITE_ONCE(f6i->trap, trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
- f6i->offload_failed == offload_failed)
+ READ_ONCE(f6i->offload_failed) == offload_failed)
return;
- f6i->offload_failed = offload_failed;
+ WRITE_ONCE(f6i->offload_failed, offload_failed);
if (!rcu_access_pointer(f6i->fib6_node))
/* The route was removed from the tree, do not send
@@ -6513,7 +6521,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.skip_notify_on_dev_down = 0;
- net->ipv6.ip6_rt_gc_expire = 30*HZ;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
ret = 0;
out:
@@ -6547,10 +6555,16 @@ static void __net_exit ip6_route_net_exit(struct net *net)
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
- sizeof(struct ipv6_route_iter));
- proc_create_net_single("rt6_stats", 0444, net->proc_net,
- rt6_stats_seq_show, NULL);
+ if (!proc_create_net("ipv6_route", 0, net->proc_net,
+ &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter)))
+ return -ENOMEM;
+
+ if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
+ rt6_stats_seq_show, NULL)) {
+ remove_proc_entry("ipv6_route", net->proc_net);
+ return -ENOMEM;
+ }
#endif
return 0;
}
@@ -6730,6 +6744,7 @@ int __init ip6_route_init(void)
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
+ INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 73aaabf0e966..29346a6eec9f 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -191,6 +191,11 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
+ if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (hinfo) {
err = seg6_hmac_info_del(net, hmackeyid);
if (err)
@@ -499,6 +504,7 @@ static struct genl_family seg6_genl_family __ro_after_init = {
.parallel_ops = true,
.ops = seg6_genl_ops,
.n_ops = ARRAY_SIZE(seg6_genl_ops),
+ .resv_start_op = SEG6_CMD_GET_TUNSRC + 1,
.module = THIS_MODULE,
};
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 29bc4e7c3046..d43c50a7310d 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -399,7 +399,6 @@ int __init seg6_hmac_init(void)
{
return seg6_hmac_init_algo();
}
-EXPORT_SYMBOL(seg6_hmac_init);
int __net_init seg6_hmac_net_init(struct net *net)
{
@@ -407,7 +406,6 @@ int __net_init seg6_hmac_net_init(struct net *net)
return rhashtable_init(&sdata->hmac_infos, &rht_params);
}
-EXPORT_SYMBOL(seg6_hmac_net_init);
void seg6_hmac_exit(void)
{
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index d64855010948..34db881204d2 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -36,9 +36,11 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
case SEG6_IPTUN_MODE_INLINE:
break;
case SEG6_IPTUN_MODE_ENCAP:
+ case SEG6_IPTUN_MODE_ENCAP_RED:
head = sizeof(struct ipv6hdr);
break;
case SEG6_IPTUN_MODE_L2ENCAP:
+ case SEG6_IPTUN_MODE_L2ENCAP_RED:
return 0;
}
@@ -189,12 +191,132 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, tot_len);
return 0;
}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
+/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
+static int seg6_do_srh_encap_red(struct sk_buff *skb,
+ struct ipv6_sr_hdr *osrh, int proto)
+{
+ __u8 first_seg = osrh->first_segment;
+ struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(dst->dev);
+ struct ipv6hdr *hdr, *inner_hdr;
+ int hdrlen = ipv6_optlen(osrh);
+ int red_tlv_offset, tlv_offset;
+ struct ipv6_sr_hdr *isrh;
+ bool skip_srh = false;
+ __be32 flowlabel;
+ int tot_len, err;
+ int red_hdrlen;
+ int tlvs_len;
+
+ if (first_seg > 0) {
+ red_hdrlen = hdrlen - sizeof(struct in6_addr);
+ } else {
+ /* NOTE: if tag/flags and/or other TLVs are introduced in the
+ * seg6_iptunnel infrastructure, they should be considered when
+ * deciding to skip the SRH.
+ */
+ skip_srh = !sr_has_hmac(osrh);
+
+ red_hdrlen = skip_srh ? 0 : hdrlen;
+ }
+
+ tot_len = red_hdrlen + sizeof(struct ipv6hdr);
+
+ err = skb_cow_head(skb, tot_len + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ inner_hdr = ipv6_hdr(skb);
+ flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
+
+ skb_push(skb, tot_len);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ hdr = ipv6_hdr(skb);
+
+ /* based on seg6_do_srh_encap() */
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+ flowlabel);
+ hdr->hop_limit = inner_hdr->hop_limit;
+ } else {
+ ip6_flow_hdr(hdr, 0, flowlabel);
+ hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ IP6CB(skb)->iif = skb->skb_iif;
+ }
+
+ /* no matter if we have to skip the SRH or not, the first segment
+ * always comes in the pushed IPv6 header.
+ */
+ hdr->daddr = osrh->segments[first_seg];
+
+ if (skip_srh) {
+ hdr->nexthdr = proto;
+
+ set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
+ goto out;
+ }
+
+ /* we cannot skip the SRH, slow path */
+
+ hdr->nexthdr = NEXTHDR_ROUTING;
+ isrh = (void *)hdr + sizeof(struct ipv6hdr);
+
+ if (unlikely(!first_seg)) {
+ /* this is a very rare case; we have only one SID but
+ * we cannot skip the SRH since we are carrying some
+ * other info.
+ */
+ memcpy(isrh, osrh, hdrlen);
+ goto srcaddr;
+ }
+
+ tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr);
+ red_tlv_offset = tlv_offset - sizeof(struct in6_addr);
+
+ memcpy(isrh, osrh, red_tlv_offset);
+
+ tlvs_len = hdrlen - tlv_offset;
+ if (unlikely(tlvs_len > 0)) {
+ const void *s = (const void *)osrh + tlv_offset;
+ void *d = (void *)isrh + red_tlv_offset;
+
+ memcpy(d, s, tlvs_len);
+ }
+
+ --isrh->first_segment;
+ isrh->hdrlen -= 2;
+
+srcaddr:
+ isrh->nexthdr = proto;
+ set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
+ err = seg6_push_hmac(net, &hdr->saddr, isrh);
+ if (unlikely(err))
+ return err;
+ }
+#endif
+
+out:
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+ skb_postpush_rcsum(skb, hdr, tot_len);
+
+ return 0;
+}
+
/* insert an SRH within an IPv6 packet, just after the IPv6 header */
int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
{
@@ -241,6 +363,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
return 0;
@@ -265,6 +389,7 @@ static int seg6_do_srh(struct sk_buff *skb)
return err;
break;
case SEG6_IPTUN_MODE_ENCAP:
+ case SEG6_IPTUN_MODE_ENCAP_RED:
err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
if (err)
return err;
@@ -276,7 +401,11 @@ static int seg6_do_srh(struct sk_buff *skb)
else
return -EINVAL;
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
+ err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ else
+ err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
+
if (err)
return err;
@@ -285,6 +414,7 @@ static int seg6_do_srh(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
break;
case SEG6_IPTUN_MODE_L2ENCAP:
+ case SEG6_IPTUN_MODE_L2ENCAP_RED:
if (!skb_mac_header_was_set(skb))
return -EINVAL;
@@ -294,7 +424,13 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_mac_header_rebuild(skb);
skb_push(skb, skb->mac_len);
- err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
+ if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
+ err = seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET);
+ else
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ IPPROTO_ETHERNET);
+
if (err)
return err;
@@ -302,7 +438,6 @@ static int seg6_do_srh(struct sk_buff *skb)
break;
}
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
@@ -514,6 +649,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
break;
case SEG6_IPTUN_MODE_L2ENCAP:
break;
+ case SEG6_IPTUN_MODE_ENCAP_RED:
+ break;
+ case SEG6_IPTUN_MODE_L2ENCAP_RED:
+ break;
default:
return -EINVAL;
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9fbe243a0e81..8370726ae7bf 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -73,6 +73,55 @@ struct bpf_lwt_prog {
char *name;
};
+/* default length values (expressed in bits) for both Locator-Block and
+ * Locator-Node Function.
+ *
+ * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
+ * i) greater than 0;
+ * ii) evenly divisible by 8. In other terms, the lengths of the
+ * Locator-Block and Locator-Node Function must be byte-aligned (we can
+ * relax this constraint in the future if really needed).
+ *
+ * Moreover, a third condition must hold:
+ * iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
+ *
+ * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
+ * values are checked during the kernel compilation. If the compilation stops,
+ * check the value of these parameters to see if they meet conditions (i), (ii)
+ * and (iii).
+ */
+#define SEG6_LOCAL_LCBLOCK_DBITS 32
+#define SEG6_LOCAL_LCNODE_FN_DBITS 16
+
+/* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
+ * used directly to check whether the lengths (in bits) of Locator-Block and
+ * Locator-Node Function are valid according to (i), (ii), (iii).
+ */
+#define next_csid_chk_cntr_bits(blen, flen) \
+ ((blen) + (flen) > 128)
+
+#define next_csid_chk_lcblock_bits(blen) \
+({ \
+ typeof(blen) __tmp = blen; \
+ (!__tmp || __tmp > 120 || (__tmp & 0x07)); \
+})
+
+#define next_csid_chk_lcnode_fn_bits(flen) \
+ next_csid_chk_lcblock_bits(flen)
+
+/* Supported Flavor operations are reported in this bitmask */
+#define SEG6_LOCAL_FLV_SUPP_OPS (BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID))
+
+struct seg6_flavors_info {
+ /* Flavor operations */
+ __u32 flv_ops;
+
+ /* Locator-Block length, expressed in bits */
+ __u8 lcblock_bits;
+ /* Locator-Node Function length, expressed in bits*/
+ __u8 lcnode_func_bits;
+};
+
enum seg6_end_dt_mode {
DT_INVALID_MODE = -EINVAL,
DT_LEGACY_MODE = 0,
@@ -136,6 +185,8 @@ struct seg6_local_lwt {
#ifdef CONFIG_NET_L3_MASTER_DEV
struct seg6_end_dt_info dt_info;
#endif
+ struct seg6_flavors_info flv_info;
+
struct pcpu_seg6_local_counters __percpu *pcpu_counters;
int headroom;
@@ -218,6 +269,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
struct flowi6 fl6;
int dev_flags = 0;
+ memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
fl6.saddr = hdr->saddr;
@@ -270,8 +322,50 @@ int seg6_lookup_nexthop(struct sk_buff *skb,
return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
}
-/* regular endpoint function */
-static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
+{
+ return finfo->lcblock_bits >> 3;
+}
+
+static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
+{
+ return finfo->lcnode_func_bits >> 3;
+}
+
+static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
+ const struct seg6_flavors_info *finfo)
+{
+ __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
+ __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
+ __u8 arg_octects;
+ int i;
+
+ arg_octects = 16 - blk_octects - fnc_octects;
+ for (i = 0; i < arg_octects; ++i) {
+ if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
+ return false;
+ }
+
+ return true;
+}
+
+/* assume that DA.Argument length > 0 */
+static void seg6_next_csid_advance_arg(struct in6_addr *addr,
+ const struct seg6_flavors_info *finfo)
+{
+ __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
+ __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
+
+ /* advance DA.Argument */
+ memmove(&addr->s6_addr[blk_octects],
+ &addr->s6_addr[blk_octects + fnc_octects],
+ 16 - blk_octects - fnc_octects);
+
+ memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
+}
+
+static int input_action_end_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
{
struct ipv6_sr_hdr *srh;
@@ -290,6 +384,38 @@ drop:
return -EINVAL;
}
+static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+
+ if (seg6_next_csid_is_arg_zero(daddr, finfo))
+ return input_action_end_core(skb, slwt);
+
+ /* update DA */
+ seg6_next_csid_advance_arg(daddr, finfo);
+
+ seg6_lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+}
+
+static bool seg6_next_csid_enabled(__u32 fops)
+{
+ return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
+}
+
+/* regular endpoint function */
+static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+
+ if (seg6_next_csid_enabled(finfo->flv_ops))
+ return end_next_csid_core(skb, slwt);
+
+ return input_action_end_core(skb, slwt);
+}
+
/* regular endpoint, and forward to specified nexthop */
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -825,7 +951,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
@@ -857,7 +982,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
@@ -952,7 +1076,8 @@ static struct seg6_action_desc seg6_action_table[] = {
{
.action = SEG6_LOCAL_ACTION_END,
.attrs = 0,
- .optattrs = SEG6_F_LOCAL_COUNTERS,
+ .optattrs = SEG6_F_LOCAL_COUNTERS |
+ SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
.input = input_action_end,
},
{
@@ -1133,9 +1258,11 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
[SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED },
+ [SEG6_LOCAL_FLAVORS] = { .type = NLA_NESTED },
};
-static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
struct ipv6_sr_hdr *srh;
int len;
@@ -1192,7 +1319,8 @@ static void destroy_attr_srh(struct seg6_local_lwt *slwt)
kfree(slwt->srh);
}
-static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
@@ -1226,7 +1354,8 @@ seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
}
static int parse_nla_vrftable(struct nlattr **attrs,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
@@ -1262,7 +1391,8 @@ static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
-static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
sizeof(struct in_addr));
@@ -1288,7 +1418,8 @@ static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
}
-static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
sizeof(struct in6_addr));
@@ -1314,7 +1445,8 @@ static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
}
-static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
@@ -1337,7 +1469,8 @@ static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
-static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
@@ -1367,7 +1500,8 @@ static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
.len = MAX_PROG_NAME },
};
-static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
struct bpf_prog *p;
@@ -1445,7 +1579,8 @@ nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
};
static int parse_nla_counters(struct nlattr **attrs,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
struct pcpu_seg6_local_counters __percpu *pcounters;
struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
@@ -1543,8 +1678,195 @@ static void destroy_attr_counters(struct seg6_local_lwt *slwt)
free_percpu(slwt->pcpu_counters);
}
+static const
+struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
+ [SEG6_LOCAL_FLV_OPERATION] = { .type = NLA_U32 },
+ [SEG6_LOCAL_FLV_LCBLOCK_BITS] = { .type = NLA_U8 },
+ [SEG6_LOCAL_FLV_LCNODE_FN_BITS] = { .type = NLA_U8 },
+};
+
+/* check whether the lengths of the Locator-Block and Locator-Node Function
+ * are compatible with the dimension of a C-SID container.
+ */
+static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
+{
+ /* Locator-Block and Locator-Node Function cannot exceed 128 bits
+ * (i.e. C-SID container lenghts).
+ */
+ if (next_csid_chk_cntr_bits(block_len, func_len))
+ return -EINVAL;
+
+ /* Locator-Block length must be greater than zero and evenly divisible
+ * by 8. There must be room for a Locator-Node Function, at least.
+ */
+ if (next_csid_chk_lcblock_bits(block_len))
+ return -EINVAL;
+
+ /* Locator-Node Function length must be greater than zero and evenly
+ * divisible by 8. There must be room for the Locator-Block.
+ */
+ if (next_csid_chk_lcnode_fn_bits(func_len))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
+ struct seg6_flavors_info *finfo,
+ struct netlink_ext_ack *extack)
+{
+ __u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
+ __u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
+ int rc;
+
+ if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
+ block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
+
+ if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
+ func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
+
+ rc = seg6_chk_next_csid_cfg(block_len, func_len);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid Locator Block/Node Function lengths");
+ return rc;
+ }
+
+ finfo->lcblock_bits = block_len;
+ finfo->lcnode_func_bits = func_len;
+
+ return 0;
+}
+
+static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
+{
+ struct seg6_flavors_info *finfo = &slwt->flv_info;
+ struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
+ unsigned long fops;
+ int rc;
+
+ rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
+ attrs[SEG6_LOCAL_FLAVORS],
+ seg6_local_flavors_policy, NULL);
+ if (rc < 0)
+ return rc;
+
+ /* this attribute MUST always be present since it represents the Flavor
+ * operation(s) to be carried out.
+ */
+ if (!tb[SEG6_LOCAL_FLV_OPERATION])
+ return -EINVAL;
+
+ fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
+ if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
+ NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
+ return -EOPNOTSUPP;
+ }
+
+ finfo->flv_ops = fops;
+
+ if (seg6_next_csid_enabled(fops)) {
+ /* Locator-Block and Locator-Node Function lengths can be
+ * provided by the user space. Otherwise, default values are
+ * applied.
+ */
+ rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
+ struct seg6_flavors_info *finfo)
+{
+ if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
+ finfo->lcnode_func_bits))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct seg6_flavors_info *finfo = &slwt->flv_info;
+ __u32 fops = finfo->flv_ops;
+ struct nlattr *nest;
+ int rc;
+
+ nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
+ rc = -EMSGSIZE;
+ goto err;
+ }
+
+ if (seg6_next_csid_enabled(fops)) {
+ rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
+ if (rc < 0)
+ goto err;
+ }
+
+ return nla_nest_end(skb, nest);
+
+err:
+ nla_nest_cancel(skb, nest);
+ return rc;
+}
+
+static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
+ struct seg6_flavors_info *finfo_b)
+{
+ if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
+ return 1;
+
+ if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
+ return 1;
+
+ return 0;
+}
+
+static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ struct seg6_flavors_info *finfo_a = &a->flv_info;
+ struct seg6_flavors_info *finfo_b = &b->flv_info;
+
+ if (finfo_a->flv_ops != finfo_b->flv_ops)
+ return 1;
+
+ if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
+ if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int encap_size_flavors(struct seg6_local_lwt *slwt)
+{
+ struct seg6_flavors_info *finfo = &slwt->flv_info;
+ int nlsize;
+
+ nlsize = nla_total_size(0) + /* nest SEG6_LOCAL_FLAVORS */
+ nla_total_size(4); /* SEG6_LOCAL_FLV_OPERATION */
+
+ if (seg6_next_csid_enabled(finfo->flv_ops))
+ nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
+ nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
+
+ return nlsize;
+}
+
struct seg6_action_param {
- int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
+ int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
@@ -1594,6 +1916,10 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
.put = put_nla_counters,
.cmp = cmp_nla_counters,
.destroy = destroy_attr_counters },
+
+ [SEG6_LOCAL_FLAVORS] = { .parse = parse_nla_flavors,
+ .put = put_nla_flavors,
+ .cmp = cmp_nla_flavors },
};
/* call the destroy() callback (if available) for each set attribute in
@@ -1615,7 +1941,7 @@ static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
* callback. If the callback is not available, then we skip to the next
* attribute; otherwise, we call the destroy() callback.
*/
- for (i = 0; i < max_parsed; ++i) {
+ for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
if (!(parsed_attrs & SEG6_F_ATTR(i)))
continue;
@@ -1637,14 +1963,15 @@ static void destroy_attrs(struct seg6_local_lwt *slwt)
}
static int parse_nla_optional_attrs(struct nlattr **attrs,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
struct seg6_action_desc *desc = slwt->desc;
unsigned long parsed_optattrs = 0;
struct seg6_action_param *param;
int err, i;
- for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) {
+ for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
continue;
@@ -1653,7 +1980,7 @@ static int parse_nla_optional_attrs(struct nlattr **attrs,
*/
param = &seg6_action_params[i];
- err = param->parse(attrs, slwt);
+ err = param->parse(attrs, slwt, extack);
if (err < 0)
goto parse_optattrs_err;
@@ -1706,7 +2033,8 @@ static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
ops->destroy_state(slwt);
}
-static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
+ struct netlink_ext_ack *extack)
{
struct seg6_action_param *param;
struct seg6_action_desc *desc;
@@ -1743,21 +2071,21 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
}
/* parse the required attributes */
- for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+ for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
if (desc->attrs & SEG6_F_ATTR(i)) {
if (!attrs[i])
return -EINVAL;
param = &seg6_action_params[i];
- err = param->parse(attrs, slwt);
+ err = param->parse(attrs, slwt, extack);
if (err < 0)
goto parse_attrs_err;
}
}
/* parse the optional attributes, if any */
- err = parse_nla_optional_attrs(attrs, slwt);
+ err = parse_nla_optional_attrs(attrs, slwt, extack);
if (err < 0)
goto parse_attrs_err;
@@ -1801,7 +2129,7 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
slwt = seg6_local_lwtunnel(newts);
slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
- err = parse_nla_action(tb, slwt);
+ err = parse_nla_action(tb, slwt, extack);
if (err < 0)
goto out_free;
@@ -1848,7 +2176,7 @@ static int seg6_local_fill_encap(struct sk_buff *skb,
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
- for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+ for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
if (attrs & SEG6_F_ATTR(i)) {
param = &seg6_action_params[i];
err = param->put(skb, slwt);
@@ -1905,6 +2233,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
/* SEG6_LOCAL_CNT_ERRORS */
nla_total_size_64bit(sizeof(__u64));
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
+ nlsize += encap_size_flavors(slwt);
+
return nlsize;
}
@@ -1928,7 +2259,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
if (attrs_a != attrs_b)
return 1;
- for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+ for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
if (attrs_a & SEG6_F_ATTR(i)) {
param = &seg6_action_params[i];
if (param->cmp(slwt_a, slwt_b))
@@ -1960,6 +2291,15 @@ int __init seg6_local_init(void)
*/
BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
+ /* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
+ * bits) have been changed with invalid values, kernel build stops
+ * here.
+ */
+ BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
+ SEG6_LOCAL_LCNODE_FN_DBITS));
+ BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
+ BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
+
return lwtunnel_encap_add_ops(&seg6_local_ops,
LWTUNNEL_ENCAP_SEG6_LOCAL);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a618dce7e0bc..5703d3cbea9b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -254,7 +254,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
goto failed;
- strlcpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name, IFNAMSIZ);
} else {
strcpy(name, "sit%d");
}
@@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
- rcu_read_lock();
-
ca = min(t->prl_count, cmax);
if (!kp) {
@@ -341,7 +339,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
}
}
- c = 0;
+ rcu_read_lock();
for_each_prl_rcu(t->prl) {
if (c >= cmax)
break;
@@ -353,7 +351,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u
if (kprl.addr != htonl(INADDR_ANY))
break;
}
-out:
+
rcu_read_unlock();
len = sizeof(*kp) * c;
@@ -362,7 +360,7 @@ out:
ret = -EFAULT;
kfree(kp);
-
+out:
return ret;
}
@@ -521,7 +519,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_del_prl(tunnel, NULL);
}
dst_cache_reset(&tunnel->dst_cache);
- dev_put_track(dev, &tunnel->dev_tracker);
+ netdev_put(dev, &tunnel->dev_tracker);
}
static int ipip6_err(struct sk_buff *skb, u32 info)
@@ -686,8 +684,6 @@ static int ipip6_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr, sifindex);
if (tunnel) {
- struct pcpu_sw_netstats *tstats;
-
if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
tunnel->parms.iph.protocol != 0)
goto out;
@@ -724,11 +720,7 @@ static int ipip6_rcv(struct sk_buff *skb)
}
}
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ dev_sw_netstats_rx_add(tunnel->dev, skb->len);
netif_rx(skb);
@@ -956,7 +948,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
}
- if (rt->rt_type != RTN_UNICAST) {
+ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
ip_rt_put(rt);
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
@@ -1132,10 +1124,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int mtu;
- dev->mtu = tdev->mtu - t_hlen;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ mtu = tdev->mtu - t_hlen;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
@@ -1463,7 +1457,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
dev->tstats = NULL;
return err;
}
- dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
return 0;
}
@@ -1511,71 +1505,12 @@ static void ipip6_netlink_parms(struct nlattr *data[],
if (!data)
return;
- if (data[IFLA_IPTUN_LINK])
- parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
-
- if (data[IFLA_IPTUN_LOCAL])
- parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
-
- if (data[IFLA_IPTUN_REMOTE])
- parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
-
- if (data[IFLA_IPTUN_TTL]) {
- parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
- if (parms->iph.ttl)
- parms->iph.frag_off = htons(IP_DF);
- }
-
- if (data[IFLA_IPTUN_TOS])
- parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
-
- if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
- parms->iph.frag_off = htons(IP_DF);
-
- if (data[IFLA_IPTUN_FLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
-
- if (data[IFLA_IPTUN_PROTO])
- parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ ip_tunnel_netlink_parms(data, parms);
if (data[IFLA_IPTUN_FWMARK])
*fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
}
-/* This function returns true when ENCAP attributes are present in the nl msg */
-static bool ipip6_netlink_encap_parms(struct nlattr *data[],
- struct ip_tunnel_encap *ipencap)
-{
- bool ret = false;
-
- memset(ipencap, 0, sizeof(*ipencap));
-
- if (!data)
- return ret;
-
- if (data[IFLA_IPTUN_ENCAP_TYPE]) {
- ret = true;
- ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
- ret = true;
- ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_SPORT]) {
- ret = true;
- ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
- }
-
- if (data[IFLA_IPTUN_ENCAP_DPORT]) {
- ret = true;
- ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
- }
-
- return ret;
-}
-
#ifdef CONFIG_IPV6_SIT_6RD
/* This function returns true when 6RD attributes are present in the nl msg */
static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
@@ -1627,7 +1562,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
nt = netdev_priv(dev);
- if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip_tunnel_encap_setup(nt, &ipencap);
if (err < 0)
return err;
@@ -1679,7 +1614,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
if (dev == sitn->fb_tunnel_dev)
return -EINVAL;
- if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d1b61d00368e..5014aa663452 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u8 rcv_wscale;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
@@ -170,7 +171,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d53dd142bf87..94a0a294c6a1 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -23,8 +23,6 @@
#endif
#include <linux/ioam6.h>
-static int two = 2;
-static int three = 3;
static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static u32 rt6_multipath_hash_fields_all_mask =
@@ -172,7 +170,7 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "fib_multipath_hash_fields",
@@ -197,7 +195,7 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "ioam6_id",
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 075ee8a2df3b..2a3f9296df1e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -146,17 +146,18 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
- struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct in6_addr *saddr = NULL, *final_p, final;
+ struct inet_timewait_death_row *tcp_death_row;
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct in6_addr *saddr = NULL, *final_p, final;
+ struct net *net = sock_net(sk);
struct ipv6_txoptions *opt;
- struct flowi6 fl6;
struct dst_entry *dst;
+ struct flowi6 fl6;
int addr_type;
int err;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -230,14 +231,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
sin.sin_family = AF_INET;
sin.sin_port = usin->sin6_port;
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
- icsk->icsk_af_ops = &ipv6_mapped;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -249,7 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
- icsk->icsk_af_ops = &ipv6_specific;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
@@ -280,15 +283,33 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
}
+ tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+
if (!saddr) {
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ struct in6_addr prev_v6_rcv_saddr;
+
+ if (icsk->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
+ sk, net, inet->inet_num);
+ prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ }
saddr = &fl6.saddr;
sk->sk_v6_rcv_saddr = *saddr;
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
+ goto failure;
+ }
+ }
}
/* set the source address */
@@ -321,8 +342,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport));
- tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
- np->saddr.s6_addr32,
+ tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
}
@@ -385,7 +405,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bool fatal;
int err;
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
&hdr->daddr, th->dest,
&hdr->saddr, ntohs(th->source),
skb->dev->ifindex, inet6_sdif(skb));
@@ -545,7 +565,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) :
np->tclass;
@@ -772,57 +792,6 @@ clear_hash_noput:
#endif
-static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb,
- int dif, int sdif)
-{
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash, l3index;
- u8 newhash[16];
-
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* check the signature */
- genhash = tcp_v6_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
- genhash ? "failed" : "mismatch",
- &ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest), l3index);
- return true;
- }
-#endif
- return false;
-}
-
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -891,7 +860,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label, u32 priority)
+ u8 tclass, __be32 label, u32 priority, u32 txhash)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -920,12 +889,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
#endif
- buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
- GFP_ATOMIC);
+ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (!buff)
return;
- skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
+ skb_reserve(buff, MAX_TCP_HEADER);
t1 = skb_push(buff, tot_len);
skb_reset_transport_header(buff);
@@ -983,15 +951,15 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
if (sk) {
- if (sk->sk_state == TCP_TIME_WAIT) {
+ if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
- /* autoflowlabel relies on buff->hash */
- skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
- PKT_HASH_TYPE_L4);
- } else {
+ else
mark = sk->sk_mark;
- }
- buff->tstamp = tcp_transmit_time(sk);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+ }
+ if (txhash) {
+ /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
+ skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
}
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
fl6.fl6_dport = t1->dest;
@@ -1003,7 +971,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
* Underlying function will use this to retrieve the network
* namespace
*/
- dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
+ if (sk && sk->sk_state != TCP_TIME_WAIT)
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
+ else
+ dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
@@ -1032,6 +1003,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
__be32 label = 0;
u32 priority = 0;
struct net *net;
+ u32 txhash = 0;
int oif = 0;
if (th->rst)
@@ -1067,11 +1039,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = inet6_lookup_listener(net,
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), dif, sdif);
+ sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
+ NULL, 0, &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->source),
+ dif, sdif);
if (!sk1)
goto out;
@@ -1105,10 +1076,12 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (np->repflow)
label = ip6_flowlabel(ipv6h);
priority = sk->sk_priority;
+ txhash = sk->sk_hash;
}
if (sk->sk_state == TCP_TIME_WAIT) {
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
priority = inet_twsk(sk)->tw_priority;
+ txhash = inet_twsk(sk)->tw_txhash;
}
} else {
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
@@ -1116,7 +1089,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
}
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
- ipv6_get_dsfield(ipv6h), label, priority);
+ ipv6_get_dsfield(ipv6h), label, priority, txhash);
#ifdef CONFIG_TCP_MD5SIG
out:
@@ -1127,10 +1100,10 @@ out:
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
- __be32 label, u32 priority)
+ __be32 label, u32 priority, u32 txhash)
{
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
- tclass, label, priority);
+ tclass, label, priority, txhash);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1142,7 +1115,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
+ tw->tw_txhash);
inet_twsk_put(tw);
}
@@ -1169,7 +1143,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+ tcp_rsk(req)->txhash);
}
@@ -1365,7 +1340,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any)
@@ -1471,6 +1446,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
+ enum skb_drop_reason reason;
struct tcp_sock *tp;
/* Imagine: socket is IPv6. IPv4 packet arrives,
@@ -1505,6 +1481,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all)
opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
@@ -1558,9 +1535,10 @@ reset:
discard:
if (opt_skb)
__kfree_skb(opt_skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
csum_err:
+ reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -1626,6 +1604,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason drop_reason;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1635,6 +1614,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int ret;
struct net *net = dev_net(skb->dev);
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -1648,8 +1628,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
- if (unlikely(th->doff < sizeof(struct tcphdr)/4))
+ if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
goto bad_packet;
+ }
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
@@ -1660,7 +1642,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
th->source, th->dest, inet6_iif(skb), sdif,
&refcounted);
if (!sk)
@@ -1676,7 +1658,10 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
+ if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1705,6 +1690,8 @@ process:
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ } else {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -1740,14 +1727,20 @@ process:
}
}
- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_and_relse;
+ }
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
+ drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
+ if (drop_reason)
goto discard_and_relse;
- if (tcp_filter(sk, skb))
+ if (tcp_filter(sk, skb)) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
+ }
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
@@ -1761,14 +1754,13 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
ret = tcp_v6_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
}
bh_unlock_sock(sk);
@@ -1778,6 +1770,7 @@ put_and_return:
return ret ? -1 : 0;
no_tcp_socket:
+ drop_reason = SKB_DROP_REASON_NO_SOCKET;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
@@ -1785,6 +1778,7 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
+ drop_reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
@@ -1794,7 +1788,8 @@ bad_packet:
}
discard_it:
- kfree_skb(skb);
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
+ kfree_skb_reason(skb, drop_reason);
return 0;
discard_and_relse:
@@ -1805,6 +1800,7 @@ discard_and_relse:
do_time_wait:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -1821,7 +1817,7 @@ do_time_wait:
{
struct sock *sk2;
- sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
@@ -1852,8 +1848,9 @@ do_time_wait:
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
const struct ipv6hdr *hdr;
const struct tcphdr *th;
struct sock *sk;
@@ -1871,7 +1868,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
return;
/* Note : We use inet6_iif() here, not tcp_v6_iif() */
- sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
inet6_iif(skb), inet6_sdif(skb));
@@ -2074,7 +2071,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
@@ -2189,7 +2186,10 @@ struct proto tcpv6_prot = {
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
@@ -2200,18 +2200,13 @@ struct proto tcpv6_prot = {
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
- .h.hashinfo = &tcp_hashinfo,
+ .h.hashinfo = NULL,
.no_autobind = true,
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
@@ -2239,7 +2234,7 @@ static void __net_exit tcpv6_net_exit(struct net *net)
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
{
- inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+ tcp_twsk_purge(net_exit_list, AF_INET6);
}
static struct pernet_operations tcpv6_net_ops = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 528b81ef19c9..bc65e5b7195b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -56,6 +56,20 @@
#include <trace/events/skb.h>
#include "udp_impl.h"
+static void udpv6_destruct_sock(struct sock *sk)
+{
+ udp_destruct_common(sk);
+ inet6_sock_destruct(sk);
+}
+
+int udpv6_init_sock(struct sock *sk)
+{
+ skb_queue_head_init(&udp_sk(sk)->reader_queue);
+ sk->sk_destruct = udpv6_destruct_sock;
+ set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ return 0;
+}
+
static u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *laddr,
const u16 lport,
@@ -105,7 +119,7 @@ static int compute_score(struct sock *sk, struct net *net,
const struct in6_addr *daddr, unsigned short hnum,
int dif, int sdif)
{
- int score;
+ int bound_dev_if, score;
struct inet_sock *inet;
bool dev_match;
@@ -132,10 +146,11 @@ static int compute_score(struct sock *sk, struct net *net,
score++;
}
- dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif);
if (!dev_match)
return -1;
- if (sk->sk_bound_dev_if)
+ if (bound_dev_if)
score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
@@ -181,7 +196,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk, false))
+ if (result && !reuseport_has_conns(sk))
return result;
result = result ? : sk;
@@ -322,7 +337,7 @@ static int udp6_skb_len(struct sk_buff *skb)
*/
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -342,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
@@ -391,7 +406,7 @@ try_again:
if (!peeking)
SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (msg->msg_name) {
@@ -615,8 +630,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
/* Tunnels don't have an application socket: don't pass errors back */
- if (tunnel)
+ if (tunnel) {
+ if (udp_sk(sk)->encap_err_rcv)
+ udp_sk(sk)->encap_err_rcv(sk, skb, offset);
goto out;
+ }
if (!np->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
@@ -646,16 +664,20 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
+ enum skb_drop_reason drop_reason;
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
+ if (rc == -ENOMEM) {
UDP6_INC_STATS(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
- else
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ } else {
UDP6_INC_STATS(sock_net(sk),
UDP_MIB_MEMERRORS, is_udplite);
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ }
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return -1;
}
@@ -671,11 +693,14 @@ static __inline__ int udpv6_err(struct sk_buff *skb,
static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
@@ -734,8 +759,10 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
- if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto drop;
+ }
udp_csum_pull_header(skb);
@@ -744,11 +771,12 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
return __udpv6_queue_rcv_skb(sk, skb);
csum_error:
+ drop_reason = SKB_DROP_REASON_UDP_CSUM;
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return -1;
}
@@ -789,7 +817,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport && inet->inet_dport != rmt_port) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) ||
+ !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) ||
(!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
@@ -912,6 +940,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
struct udphdr *uh;
@@ -988,6 +1017,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
return udp6_unicast_rcv_skb(sk, skb, uh);
}
+ reason = SKB_DROP_REASON_NO_SOCKET;
+
if (!uh->check)
goto report_csum_error;
@@ -1000,10 +1031,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
short_packet:
+ if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ reason = SKB_DROP_REASON_PKT_TOO_SMALL;
net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
saddr, ntohs(uh->source),
@@ -1014,10 +1047,12 @@ short_packet:
report_csum_error:
udp6_csum_zero_error(skb);
csum_error:
+ if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
+ reason = SKB_DROP_REASON_UDP_CSUM;
__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
@@ -1036,7 +1071,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
- INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
+ inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -1044,7 +1079,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1116,7 +1151,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
* bytes that are out of the bound specified by user in addr_len.
*/
if (uaddr->sa_family == AF_INET) {
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -EAFNOSUPPORT;
return udp_pre_connect(sk, uaddr, addr_len);
}
@@ -1266,23 +1301,17 @@ static int udp_v6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
struct udp_sock *up = udp_sk(sk);
- struct flowi6 fl6;
int err = 0;
if (up->pending == AF_INET)
return udp_push_pending_frames(sk);
- /* ip6_finish_skb will release the cork, so make a copy of
- * fl6 here.
- */
- fl6 = inet_sk(sk)->cork.fl.u.ip6;
-
skb = ip6_finish_skb(sk);
if (!skb)
goto out;
- err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
-
+ err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6,
+ &inet_sk(sk)->cork.base);
out:
up->len = 0;
up->pending = 0;
@@ -1300,7 +1329,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipv6_txoptions *opt = NULL;
struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
- struct flowi6 fl6;
+ struct inet_cork_full cork;
+ struct flowi6 *fl6 = &cork.fl.u.ip6;
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
@@ -1357,15 +1387,12 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
msg->msg_name = &sin;
msg->msg_namelen = sizeof(sin);
do_udp_sendmsg:
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
return udp_sendmsg(sk, msg, len);
}
}
- if (up->pending == AF_INET)
- return udp_sendmsg(sk, msg, len);
-
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
*/
@@ -1374,6 +1401,8 @@ do_udp_sendmsg:
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
if (up->pending) {
+ if (up->pending == AF_INET)
+ return udp_sendmsg(sk, msg, len);
/*
* There are pending frames.
* The socket lock must be held while it's corked.
@@ -1391,19 +1420,19 @@ do_udp_sendmsg:
}
ulen += sizeof(struct udphdr);
- memset(&fl6, 0, sizeof(fl6));
+ memset(fl6, 0, sizeof(*fl6));
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
- fl6.fl6_dport = sin6->sin6_port;
+ fl6->fl6_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
if (np->sndflow) {
- fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
}
@@ -1420,24 +1449,24 @@ do_udp_sendmsg:
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
__ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
- fl6.flowi6_oif = sin6->sin6_scope_id;
+ fl6->flowi6_oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- fl6.fl6_dport = inet->inet_dport;
+ fl6->fl6_dport = inet->inet_dport;
daddr = &sk->sk_v6_daddr;
- fl6.flowlabel = np->flow_label;
+ fl6->flowlabel = np->flow_label;
connected = true;
}
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = sk->sk_bound_dev_if;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if);
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6->flowi6_uid = sk->sk_uid;
if (msg->msg_controllen) {
opt = &opt_space;
@@ -1447,14 +1476,14 @@ do_udp_sendmsg:
err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
if (err > 0)
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
&ipc6);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
- if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
+ if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+ flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
}
@@ -1471,16 +1500,17 @@ do_udp_sendmsg:
opt = ipv6_fixup_options(&opt_space, opt);
ipc6.opt = opt;
- fl6.flowi6_proto = sk->sk_protocol;
- fl6.flowi6_mark = ipc6.sockc.mark;
- fl6.daddr = *daddr;
- if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
- fl6.saddr = np->saddr;
- fl6.fl6_sport = inet->inet_sport;
+ fl6->flowi6_proto = sk->sk_protocol;
+ fl6->flowi6_mark = ipc6.sockc.mark;
+ fl6->daddr = *daddr;
+ if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
+ fl6->saddr = np->saddr;
+ fl6->fl6_sport = inet->inet_sport;
if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
- (struct sockaddr *)sin6, &fl6.saddr);
+ (struct sockaddr *)sin6,
+ &fl6->saddr);
if (err)
goto out_no_dst;
if (sin6) {
@@ -1496,32 +1526,32 @@ do_udp_sendmsg:
err = -EINVAL;
goto out_no_dst;
}
- fl6.fl6_dport = sin6->sin6_port;
- fl6.daddr = sin6->sin6_addr;
+ fl6->fl6_dport = sin6->sin6_port;
+ fl6->daddr = sin6->sin6_addr;
}
}
- if (ipv6_addr_any(&fl6.daddr))
- fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ if (ipv6_addr_any(&fl6->daddr))
+ fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(fl6, opt, &final);
if (final_p)
connected = false;
- if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
- fl6.flowi6_oif = np->mcast_oif;
+ if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
+ fl6->flowi6_oif = np->mcast_oif;
connected = false;
- } else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ } else if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = np->ucast_oif;
- security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
if (ipc6.tclass < 0)
ipc6.tclass = np->tclass;
- fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+ fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
- dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected);
+ dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
@@ -1529,7 +1559,7 @@ do_udp_sendmsg:
}
if (ipc6.hlimit < 0)
- ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst);
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -1537,17 +1567,17 @@ back_from_confirm:
/* Lockless fast path for the non-corking case */
if (!corkreq) {
- struct inet_cork_full cork;
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
- &fl6, (struct rt6_info *)dst,
+ (struct rt6_info *)dst,
msg->msg_flags, &cork);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_v6_send_skb(skb, &fl6, &cork.base);
- goto out;
+ err = udp_v6_send_skb(skb, fl6, &cork.base);
+ /* ip6_make_skb steals dst reference */
+ goto out_no_dst;
}
lock_sock(sk);
@@ -1568,7 +1598,7 @@ do_append_data:
ipc6.dontfrag = np->dontfrag;
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
+ &ipc6, fl6, (struct rt6_info *)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
@@ -1603,7 +1633,7 @@ out_no_dst:
do_confirm:
if (msg->msg_flags & MSG_PROBE)
- dst_confirm_neigh(dst, &fl6.daddr);
+ dst_confirm_neigh(dst, &fl6->daddr);
if (!(msg->msg_flags&MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
@@ -1657,12 +1687,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
@@ -1722,7 +1747,7 @@ struct proto udpv6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
- .init = udp_init_sock,
+ .init = udpv6_init_sock,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
@@ -1737,7 +1762,10 @@ struct proto udpv6_prot = {
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
+
.memory_allocated = &udp_memory_allocated,
+ .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
+
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index b2fcc46c1630..0590f566379d 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
__be32, struct udp_table *);
+int udpv6_init_sock(struct sock *sk);
int udp_v6_get_port(struct sock *sk, unsigned short snum);
void udp_v6_rehash(struct sock *sk);
@@ -20,8 +21,8 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
-int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
void udpv6_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index fbb700d3f437..67eaf3ca14ce 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -12,6 +12,13 @@
#include <linux/proc_fs.h>
#include "udp_impl.h"
+static int udplitev6_sk_init(struct sock *sk)
+{
+ udpv6_init_sock(sk);
+ udp_sk(sk)->pcflag = UDPLITE_BIT;
+ return 0;
+}
+
static int udplitev6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
@@ -38,7 +45,7 @@ struct proto udplitev6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
- .init = udplite_sk_init,
+ .init = udplitev6_sk_init,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
@@ -48,7 +55,10 @@ struct proto udplitev6_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
+
.memory_allocated = &udp_memory_allocated,
+ .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
+
.sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d0d280077721..ad07904642ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -45,6 +45,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
return xfrm_output(sk, skb);
}
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+ struct frag_hdr *fh;
+ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+ if (prevhdr != NEXTHDR_FRAGMENT)
+ return 0;
+ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+ return 1;
+ return 0;
+}
+
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +86,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
+ } else if (toobig && xfrm6_noneed_fragment(skb)) {
+ skb->ignore_df = 1;
+ goto skip_frag;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index fad687ee6dd8..4a4b0e49ec92 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
int err;
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
fl6.flowi6_mark = mark;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
@@ -74,11 +73,11 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
struct rt6_info *rt = (struct rt6_info *)xdst->route;
xdst->u.dst.dev = dev;
- dev_hold_track(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
if (!xdst->u.rt6.rt6i_idev) {
- dev_put_track(dev, &xdst->u.dst.dev_tracker);
+ netdev_put(dev, &xdst->u.dst.dev_tracker);
return -ENODEV;
}
@@ -92,7 +91,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt6.rt6i_src = rt->rt6i_src;
INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
rt6_uncached_list_add(&xdst->u.rt6);
- atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
return 0;
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2b31112c0856..1323f2f6928e 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -270,13 +270,17 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
-static int xfrm6_tunnel_init_state(struct xfrm_state *x)
+static int xfrm6_tunnel_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
- if (x->props.mode != XFRM_MODE_TUNNEL)
+ if (x->props.mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG(extack, "IPv6 tunnel can only be used with tunnel mode");
return -EINVAL;
+ }
- if (x->encap)
+ if (x->encap) {
+ NL_SET_ERR_MSG(extack, "IPv6 tunnel is not compatible with encapsulation");
return -EINVAL;
+ }
x->props.header_len = sizeof(struct ipv6hdr);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a1760add5bf1..498a0c35b7bb 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -278,8 +278,6 @@ static void iucv_sock_destruct(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_error_queue);
- sk_mem_reclaim(sk);
-
if (!sock_flag(sk, SOCK_DEAD)) {
pr_err("Attempt to release alive iucv socket %p\n", sk);
return;
@@ -1223,7 +1221,6 @@ static void iucv_process_message_q(struct sock *sk)
static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
unsigned int copied, rlen;
@@ -1242,7 +1239,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
/* receive/dequeue next skb:
* the function understands MSG_PEEK and, thus, does not dequeue skb */
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 8f4d49a7d3e8..eb0295d90039 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -319,7 +319,7 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
*/
static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
{
- unsigned long reg1 = (unsigned long)param;
+ unsigned long reg1 = virt_to_phys(param);
int cc;
asm volatile (
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 71899e5a5a11..a5004228111d 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -162,7 +162,8 @@ static void kcm_rcv_ready(struct kcm_sock *kcm)
/* Buffer limit is okay now, add to ready list */
list_add_tail(&kcm->wait_rx_list,
&kcm->mux->kcm_rx_waiters);
- kcm->rx_wait = true;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, true);
}
static void kcm_rfree(struct sk_buff *skb)
@@ -178,7 +179,7 @@ static void kcm_rfree(struct sk_buff *skb)
/* For reading rx_wait and rx_psock without holding lock */
smp_mb__after_atomic();
- if (!kcm->rx_wait && !kcm->rx_psock &&
+ if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) &&
sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
spin_lock_bh(&mux->rx_lock);
kcm_rcv_ready(kcm);
@@ -237,7 +238,8 @@ try_again:
if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
/* Should mean socket buffer full */
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
/* Commit rx_wait to read in kcm_free */
smp_wmb();
@@ -280,10 +282,12 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
kcm = list_first_entry(&mux->kcm_rx_waiters,
struct kcm_sock, wait_rx_list);
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
psock->rx_kcm = kcm;
- kcm->rx_psock = psock;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_psock, psock);
spin_unlock_bh(&mux->rx_lock);
@@ -310,7 +314,8 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
spin_lock_bh(&mux->rx_lock);
psock->rx_kcm = NULL;
- kcm->rx_psock = NULL;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_psock, NULL);
/* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
* kcm_rfree
@@ -834,7 +839,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
}
get_page(page);
- skb_fill_page_desc(skb, i, page, offset, size);
+ skb_fill_page_desc_noacc(skb, i, page, offset, size);
skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
coalesced:
@@ -1240,7 +1245,8 @@ static void kcm_recv_disable(struct kcm_sock *kcm)
if (!kcm->rx_psock) {
if (kcm->rx_wait) {
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
}
requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
@@ -1412,12 +1418,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
psock->sk = csk;
psock->bpf_prog = prog;
- err = strp_init(&psock->strp, csk, &cb);
- if (err) {
- kmem_cache_free(kcm_psockp, psock);
- goto out;
- }
-
write_lock_bh(&csk->sk_callback_lock);
/* Check if sk_user_data is already by KCM or someone else.
@@ -1425,13 +1425,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
*/
if (csk->sk_user_data) {
write_unlock_bh(&csk->sk_callback_lock);
- strp_stop(&psock->strp);
- strp_done(&psock->strp);
kmem_cache_free(kcm_psockp, psock);
err = -EALREADY;
goto out;
}
+ err = strp_init(&psock->strp, csk, &cb);
+ if (err) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ kmem_cache_free(kcm_psockp, psock);
+ goto out;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1794,7 +1799,8 @@ static void kcm_done(struct kcm_sock *kcm)
if (kcm->rx_wait) {
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
}
/* Move any pending receive messages to other kcm sockets */
requeue_rx_msgs(mux, &sk->sk_receive_queue);
@@ -1839,10 +1845,10 @@ static int kcm_release(struct socket *sock)
kcm = kcm_sk(sk);
mux = kcm->mux;
+ lock_sock(sk);
sock_orphan(sk);
kfree_skb(kcm->seq_skb);
- lock_sock(sk);
/* Purge queue under lock to avoid race condition with tx_work trying
* to act when queue is nonempty. If tx_work runs after this point
* it will just return.
diff --git a/net/key/af_key.c b/net/key/af_key.c
index de24a7d474df..c85df5b958d2 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
pfk->registered |= (1<<hdr->sadb_msg_satype);
}
+ mutex_lock(&pfkey_mutex);
xfrm_probe_algs();
- supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+ supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
+ mutex_unlock(&pfkey_mutex);
+
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
@@ -2623,7 +2626,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
}
return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
- kma ? &k : NULL, net, NULL);
+ kma ? &k : NULL, net, NULL, 0);
out:
return err;
@@ -2826,6 +2829,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
+ /* Non-zero return value of pfkey_broadcast() does not always signal
+ * an error and even on an actual error we may still want to process
+ * the message so rather ignore the return value.
+ */
pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
@@ -2898,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg))
+ if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2916,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg)))
+ if (!(ealg_tmpl_set(t, ealg) && ealg->available))
continue;
for (k = 1; ; k++) {
@@ -2927,7 +2934,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg))
+ if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
}
@@ -3696,7 +3703,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
goto out;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
@@ -3711,7 +3718,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err)
goto out_free;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
err = (flags & MSG_TRUNC) ? skb->len : copied;
@@ -3890,14 +3897,10 @@ static int __init ipsec_pfkey_init(void)
err = sock_register(&pfkey_family_ops);
if (err != 0)
goto out_unregister_pernet;
- err = xfrm_register_km(&pfkeyv2_mgr);
- if (err != 0)
- goto out_sock_unregister;
+ xfrm_register_km(&pfkeyv2_mgr);
out:
return err;
-out_sock_unregister:
- sock_unregister(PF_KEY);
out_unregister_pernet:
unregister_pernet_subsys(&pfkey_net_ops);
out_unregister_key_proto:
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 9d1aafe75f92..4595b56d175d 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -184,7 +184,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
session->pwtype == L2TP_PWTYPE_PPP ? "PPP" :
"");
if (session->send_seq || session->recv_seq)
- seq_printf(m, " nr %hu, ns %hu\n", session->nr, session->ns);
+ seq_printf(m, " nr %u, ns %u\n", session->nr, session->ns);
seq_printf(m, " refcnt %d\n", refcount_read(&session->ref_count));
seq_printf(m, " config 0/0/%c/%c/-/%s %08x %u\n",
session->recv_seq ? 'R' : '-',
@@ -192,7 +192,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
session->lns_mode ? "LNS" : "LAC",
0,
jiffies_to_msecs(session->reorder_timeout));
- seq_printf(m, " offset 0 l2specific %hu/%hu\n",
+ seq_printf(m, " offset 0 l2specific %hu/%d\n",
session->l2specific_type, l2tp_get_l2specific_len(session));
if (session->cookie_len) {
seq_printf(m, " cookie %02x%02x%02x%02x",
@@ -215,7 +215,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
seq_puts(m, "\n");
}
- seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
+ seq_printf(m, " %u/%u tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
session->nr, session->ns,
atomic_long_read(&session->stats.tx_packets),
atomic_long_read(&session->stats.tx_bytes),
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 6cd97c75445c..f2ae03c40473 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -254,7 +254,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
int rc;
if (cfg->ifname) {
- strlcpy(name, cfg->ifname, IFNAMSIZ);
+ strscpy(name, cfg->ifname, IFNAMSIZ);
name_assign_type = NET_NAME_USER;
} else {
strcpy(name, L2TP_ETH_DEV_NAME);
@@ -314,7 +314,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
return rc;
}
- strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ strscpy(session->ifname, dev->name, IFNAMSIZ);
rcu_assign_pointer(spriv->dev, dev);
rtnl_unlock();
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index b3edafa5fba4..4db5a554bdbd 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -50,11 +50,13 @@ static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
sk_for_each_bound(sk, &l2tp_ip_bind_table) {
const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
+ int bound_dev_if;
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif)
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && dif && bound_dev_if != dif)
continue;
if (inet->inet_rcv_saddr && laddr &&
@@ -515,7 +517,7 @@ no_route:
}
static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+ size_t len, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -526,7 +528,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 96f975777438..9dbd801ddb98 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -62,11 +62,13 @@ static struct sock *__l2tp_ip6_bind_lookup(const struct net *net,
const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
+ int bound_dev_if;
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif)
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && dif && bound_dev_if != dif)
continue;
if (sk_laddr && !ipv6_addr_any(sk_laddr) &&
@@ -445,7 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_conn_id = lsk->conn_id;
}
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
- lsa->l2tp_scope_id = sk->sk_bound_dev_if;
+ lsa->l2tp_scope_id = READ_ONCE(sk->sk_bound_dev_if);
return sizeof(*lsa);
}
@@ -502,14 +504,15 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen = len + transhdrlen;
+ int ulen;
int err;
/* Rough check on arithmetic overflow,
* better check is made in ip6_append_data().
*/
- if (len > INT_MAX)
+ if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
+ ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -560,7 +563,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (fl6.flowi6_oif == 0)
- fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_oif = READ_ONCE(sk->sk_bound_dev_if);
if (msg->msg_controllen) {
opt = &opt_space;
@@ -657,7 +660,7 @@ do_confirm:
}
static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
@@ -671,7 +674,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 96eb91be9238..a901fd14fe3b 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -989,6 +989,7 @@ static struct genl_family l2tp_nl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = l2tp_nl_ops,
.n_small_ops = ARRAY_SIZE(l2tp_nl_ops),
+ .resv_start_op = L2TP_CMD_SESSION_GET + 1,
.mcgrps = l2tp_multicast_group,
.n_mcgrps = ARRAY_SIZE(l2tp_multicast_group),
};
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bf35710127dd..db2e584c625e 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -191,8 +191,7 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg,
goto end;
err = 0;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto end;
@@ -1554,7 +1553,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
session->lns_mode ? "LNS" : "LAC",
0,
jiffies_to_msecs(session->reorder_timeout));
- seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n",
+ seq_printf(m, " %u/%u %ld/%ld/%ld %ld/%ld/%ld\n",
session->nr, session->ns,
atomic_long_read(&session->stats.tx_packets),
atomic_long_read(&session->stats.tx_bytes),
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 17927966abb3..ca10916340b0 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -147,7 +147,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
dev = dev_get_by_index_rcu(net, ifindex);
while (dev && !netif_is_l3_master(dev))
- dev = netdev_master_upper_dev_get(dev);
+ dev = netdev_master_upper_dev_get_rcu(dev);
return dev ? dev->ifindex : 0;
}
@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct net_device *dev;
int rc = 0;
- rcu_read_lock();
+ /* update flow ensures flowi_l3mdev is set when relevant */
+ if (!fl->flowi_l3mdev)
+ return 0;
- dev = dev_get_by_index_rcu(net, fl->flowi_oif);
- if (dev && netif_is_l3_master(dev) &&
- dev->l3mdev_ops->l3mdev_fib_table) {
- arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
- rc = 1;
- goto out;
- }
+ rcu_read_lock();
- dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+ dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_fib_table) {
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
rc = 1;
- goto out;
}
-out:
rcu_read_unlock();
return rc;
@@ -277,31 +271,28 @@ out:
void l3mdev_update_flow(struct net *net, struct flowi *fl)
{
struct net_device *dev;
- int ifindex;
rcu_read_lock();
if (fl->flowi_oif) {
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
if (dev) {
- ifindex = l3mdev_master_ifindex_rcu(dev);
- if (ifindex) {
- fl->flowi_oif = ifindex;
- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
- goto out;
- }
+ if (!fl->flowi_l3mdev)
+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
+
+ /* oif set to L3mdev directs lookup to its table;
+ * reset to avoid oif match in fib_lookup
+ */
+ if (netif_is_l3_master(dev))
+ fl->flowi_oif = 0;
+ goto out;
}
}
- if (fl->flowi_iif) {
+ if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
dev = dev_get_by_index_rcu(net, fl->flowi_iif);
- if (dev) {
- ifindex = l3mdev_master_ifindex_rcu(dev);
- if (ifindex) {
- fl->flowi_iif = ifindex;
- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
- }
- }
+ if (dev)
+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
}
out:
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 26c00ebf4fba..da7fe94bea2e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -224,7 +224,7 @@ static int llc_ui_release(struct socket *sock)
} else {
release_sock(sk);
}
- dev_put_track(llc->dev, &llc->dev_tracker);
+ netdev_put(llc->dev, &llc->dev_tracker);
sock_put(sk);
llc_sk_free(sk);
out:
@@ -275,6 +275,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
+ struct net_device *dev = NULL;
struct llc_sap *sap;
int rc = -EINVAL;
@@ -286,16 +287,15 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
goto out;
rc = -ENODEV;
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
- if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
- dev_put(llc->dev);
- llc->dev = NULL;
+ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ if (dev && addr->sllc_arphrd != dev->type) {
+ dev_put(dev);
+ dev = NULL;
}
} else
- llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
- if (!llc->dev)
+ dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+ if (!dev)
goto out;
- netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL);
rc = -EUSERS;
llc->laddr.lsap = llc_ui_autoport();
if (!llc->laddr.lsap)
@@ -304,6 +304,12 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
sap = llc_sap_open(llc->laddr.lsap, NULL);
if (!sap)
goto out;
+
+ /* Note: We do not expect errors from this point. */
+ llc->dev = dev;
+ netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL);
+ dev = NULL;
+
memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
/* assign new connection to its SAP */
@@ -311,6 +317,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
sock_reset_flag(sk, SOCK_ZAPPED);
rc = 0;
out:
+ dev_put(dev);
return rc;
}
@@ -333,6 +340,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
+ struct net_device *dev = NULL;
struct llc_sap *sap;
int rc = -EINVAL;
@@ -348,25 +356,27 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
- if (llc->dev) {
+ dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
+ if (dev) {
if (is_zero_ether_addr(addr->sllc_mac))
- memcpy(addr->sllc_mac, llc->dev->dev_addr,
+ memcpy(addr->sllc_mac, dev->dev_addr,
IFHWADDRLEN);
- if (addr->sllc_arphrd != llc->dev->type ||
+ if (addr->sllc_arphrd != dev->type ||
!ether_addr_equal(addr->sllc_mac,
- llc->dev->dev_addr)) {
+ dev->dev_addr)) {
rc = -EINVAL;
- llc->dev = NULL;
+ dev = NULL;
}
}
- } else
- llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
+ } else {
+ dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- dev_hold_track(llc->dev, &llc->dev_tracker, GFP_ATOMIC);
+ }
+ dev_hold(dev);
rcu_read_unlock();
- if (!llc->dev)
+ if (!dev)
goto out;
+
if (!addr->sllc_sap) {
rc = -EUSERS;
addr->sllc_sap = llc_ui_autoport();
@@ -398,6 +408,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
goto out_put;
}
}
+
+ /* Note: We do not expect errors from this point. */
+ llc->dev = dev;
+ netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL);
+ dev = NULL;
+
llc->laddr.lsap = addr->sllc_sap;
memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
@@ -408,6 +424,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
out_put:
llc_sap_put(sap);
out:
+ dev_put(dev);
release_sock(sk);
return rc;
}
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 23d25e8b2358..b8de44da1fb8 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -16,6 +16,7 @@ mac80211-y := \
s1g.o \
ibss.o \
iface.o \
+ link.o \
rate.o \
michael.o \
tkip.o \
@@ -34,7 +35,8 @@ mac80211-y := \
trace.o mlme.o \
tdls.o \
ocb.o \
- airtime.o
+ airtime.o \
+ eht.o
mac80211-$(CONFIG_MAC80211_LEDS) += led.o
mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 7d2925bb966e..9414d3bbd65f 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
/**
@@ -180,7 +180,8 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
- const struct ieee80211_addba_ext_ie *req)
+ const struct ieee80211_addba_ext_ie *req,
+ u16 buf_size)
{
struct ieee80211_supported_band *sband;
struct ieee80211_addba_ext_ie *resp;
@@ -210,6 +211,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
frag_level = cap_frag_level;
resp->data |= u8_encode_bits(frag_level,
IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+ resp->data |= u8_encode_bits(buf_size >> IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT,
+ IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
}
static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
@@ -239,7 +242,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
@@ -260,8 +263,8 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
- if (sta->sta.he_cap.has_he && addbaext)
- ieee80211_add_addbaext(sdata, skb, addbaext);
+ if (sta->sta.deflink.he_cap.has_he && addbaext)
+ ieee80211_add_addbaext(sdata, skb, addbaext, buf_size);
ieee80211_tx_skb(sdata, skb);
}
@@ -293,7 +296,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
goto end;
}
- if (!sta->sta.ht_cap.ht_supported &&
+ if (!sta->sta.deflink.ht_cap.ht_supported &&
sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) {
ht_dbg(sta->sdata,
"STA %pM erroneously requests BA session on tid %d w/o QoS\n",
@@ -309,8 +312,10 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
goto end;
}
- if (sta->sta.he_cap.has_he)
- max_buf_size = IEEE80211_MAX_AMPDU_BUF;
+ if (sta->sta.deflink.eht_cap.has_eht)
+ max_buf_size = IEEE80211_MAX_AMPDU_BUF_EHT;
+ else if (sta->sta.deflink.he_cap.has_he)
+ max_buf_size = IEEE80211_MAX_AMPDU_BUF_HE;
else
max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
@@ -319,7 +324,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
* and if buffer size does not exceeds max value */
/* XXX: check own ht delayed BA capability?? */
if (((ba_policy != 1) &&
- (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
+ (!(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
(buf_size > max_buf_size)) {
status = WLAN_STATUS_INVALID_QOS_PARAM;
ht_dbg_ratelimited(sta->sdata,
@@ -497,11 +502,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
u.action.u.addba_req.variable);
if (ies_len) {
elems = ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
- ies_len, true, mgmt->bssid, NULL);
+ ies_len, true, NULL);
if (!elems || elems->parse_error)
goto free;
}
+ if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) {
+ u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data,
+ IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
+
+ buf_size |= buf_size_1k << IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT;
+ }
+
__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
start_seq_num, ba_policy, tid,
buf_size, true, false,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 74a878f213d3..07c892aa8c73 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2021 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -82,7 +82,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
@@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.start_seq_num =
cpu_to_le16(start_seq_num << 4);
- ieee80211_tx_skb_tid(sdata, skb, tid);
+ ieee80211_tx_skb_tid(sdata, skb, tid, -1);
}
void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn)
@@ -135,7 +135,7 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn)
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
IEEE80211_TX_CTL_REQ_TX_STATUS;
- ieee80211_tx_skb_tid(sdata, skb, tid);
+ ieee80211_tx_skb_tid(sdata, skb, tid, -1);
}
EXPORT_SYMBOL(ieee80211_send_bar);
@@ -467,7 +467,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
sta->ampdu_mlme.addba_req_num[tid]++;
spin_unlock_bh(&sta->lock);
- if (sta->sta.he_cap.has_he) {
+ if (sta->sta.deflink.he_cap.has_he) {
buf_size = local->hw.max_tx_aggregation_subframes;
} else {
/*
@@ -594,7 +594,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
"Requested to start BA session on reserved tid=%d", tid))
return -EINVAL;
- if (!pubsta->ht_cap.ht_supported &&
+ if (!pubsta->deflink.ht_cap.ht_supported &&
sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ)
return -EINVAL;
@@ -626,6 +626,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL;
}
+ if (test_sta_flag(sta, WLAN_STA_MFP) &&
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
+ ht_dbg(sdata,
+ "MFP STA not authorized - deny BA session request %pM tid %d\n",
+ sta->sta.addr, tid);
+ return -EINVAL;
+ }
+
/*
* 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a
* member of an IBSS, and has no other existing Block Ack agreement
@@ -639,7 +647,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
* is set when we receive a bss info from a probe response or a beacon.
*/
if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC &&
- !sta->sta.ht_cap.ht_supported) {
+ !sta->sta.deflink.ht_cap.ht_supported) {
ht_dbg(sdata,
"BA request denied - IBSS STA %pM does not advertise HT support\n",
pubsta->addr);
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 26d2f8ba7029..2e66598fac79 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: ISC
/*
* Copyright (C) 2019 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2021-2022 Intel Corporation
*/
#include <net/mac80211.h>
@@ -67,17 +68,11 @@
#define IEEE80211_VHT_STREAM_GROUPS 8 /* BW(=4) * SGI(=2) */
#define IEEE80211_HE_MAX_STREAMS 8
-#define IEEE80211_HE_STREAM_GROUPS 12 /* BW(=4) * GI(=3) */
#define IEEE80211_HT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_HT_STREAM_GROUPS)
#define IEEE80211_VHT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_VHT_STREAM_GROUPS)
-#define IEEE80211_HE_GROUPS_NB (IEEE80211_HE_MAX_STREAMS * \
- IEEE80211_HE_STREAM_GROUPS)
-#define IEEE80211_GROUPS_NB (IEEE80211_HT_GROUPS_NB + \
- IEEE80211_VHT_GROUPS_NB + \
- IEEE80211_HE_GROUPS_NB)
#define IEEE80211_HT_GROUP_0 0
#define IEEE80211_VHT_GROUP_0 (IEEE80211_HT_GROUP_0 + IEEE80211_HT_GROUPS_NB)
@@ -477,7 +472,9 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
bool sp = status->enc_flags & RX_ENC_FLAG_SHORTPRE;
bool cck;
- if (WARN_ON_ONCE(status->band > NL80211_BAND_5GHZ))
+ /* on 60GHz or sub-1GHz band, there are no legacy rates */
+ if (WARN_ON_ONCE(status->band == NL80211_BAND_60GHZ ||
+ status->band == NL80211_BAND_S1GHZ))
return 0;
sband = hw->wiphy->bands[status->band];
@@ -640,7 +637,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
len += 38; /* Ethernet header length */
- conf = rcu_dereference(vif->chanctx_conf);
+ conf = rcu_dereference(vif->bss_conf.chanctx_conf);
if (conf) {
band = conf->def.chan->band;
shift = ieee80211_chandef_get_shift(&conf->def);
@@ -650,12 +647,12 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
struct sta_info *sta = container_of(pubsta, struct sta_info,
sta);
struct ieee80211_rx_status stat;
- struct ieee80211_tx_rate *rate = &sta->tx_stats.last_rate;
- struct rate_info *ri = &sta->tx_stats.last_rate_info;
+ struct ieee80211_tx_rate *tx_rate = &sta->deflink.tx_stats.last_rate;
+ struct rate_info *ri = &sta->deflink.tx_stats.last_rate_info;
u32 duration, overhead;
u8 agg_shift;
- if (ieee80211_fill_rx_status(&stat, hw, rate, ri, band, len))
+ if (ieee80211_fill_rx_status(&stat, hw, tx_rate, ri, band, len))
return 0;
if (stat.encoding == RX_ENC_LEGACY || !ampdu)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 87a208089caf..687b4c878d4a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -23,6 +23,30 @@
#include "mesh.h"
#include "wme.h"
+static struct ieee80211_link_data *
+ieee80211_link_or_deflink(struct ieee80211_sub_if_data *sdata, int link_id,
+ bool require_valid)
+{
+ struct ieee80211_link_data *link;
+
+ if (link_id < 0) {
+ /*
+ * For keys, if sdata is not an MLD, we might not use
+ * the return value at all (if it's not a pairwise key),
+ * so in that case (require_valid==false) don't error.
+ */
+ if (require_valid && sdata->vif.valid_links)
+ return ERR_PTR(-EINVAL);
+
+ return &sdata->deflink;
+ }
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ return ERR_PTR(-ENOLINK);
+ return link;
+}
+
static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata,
struct vif_params *params)
{
@@ -39,7 +63,8 @@ static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata,
memcpy(sdata->vif.bss_conf.mu_group.position,
params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN,
WLAN_USER_POSITION_LEN);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_MU_GROUPS);
/* don't care about endianness - just check for 0 */
memcpy(&membership, params->vht_mumimo_groups,
WLAN_MEMBERSHIP_LEN);
@@ -53,7 +78,7 @@ static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata,
params->vht_mumimo_follow_addr);
}
- sdata->vif.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow;
+ sdata->vif.bss_conf.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow;
}
static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
@@ -113,14 +138,15 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_mbssid_config params)
+ struct cfg80211_mbssid_config params,
+ struct ieee80211_bss_conf *link_conf)
{
struct ieee80211_sub_if_data *tx_sdata;
sdata->vif.mbssid_tx_vif = NULL;
- sdata->vif.bss_conf.bssid_index = 0;
- sdata->vif.bss_conf.nontransmitted = false;
- sdata->vif.bss_conf.ema_ap = false;
+ link_conf->bssid_index = 0;
+ link_conf->nontransmitted = false;
+ link_conf->ema_ap = false;
if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
return -EINVAL;
@@ -133,11 +159,11 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
sdata->vif.mbssid_tx_vif = &sdata->vif;
} else {
sdata->vif.mbssid_tx_vif = &tx_sdata->vif;
- sdata->vif.bss_conf.nontransmitted = true;
- sdata->vif.bss_conf.bssid_index = params.index;
+ link_conf->nontransmitted = true;
+ link_conf->bssid_index = params.index;
}
if (params.ema)
- sdata->vif.bss_conf.ema_ap = true;
+ link_conf->ema_ap = true;
return 0;
}
@@ -200,12 +226,16 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
if (params->use_4addr == ifmgd->use_4addr)
return 0;
+ /* FIXME: no support for 4-addr MLO yet */
+ if (sdata->vif.valid_links)
+ return -EOPNOTSUPP;
+
sdata->u.mgd.use_4addr = params->use_4addr;
if (!ifmgd->associated)
return 0;
mutex_lock(&local->sta_mtx);
- sta = sta_info_get(sdata, ifmgd->bssid);
+ sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid);
if (sta)
drv_sta_set_4addr(local, sdata, &sta->sta,
params->use_4addr);
@@ -432,19 +462,23 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
- u8 key_idx, bool pairwise, const u8 *mac_addr,
- struct key_params *params)
+ int link_id, u8 key_idx, bool pairwise,
+ const u8 *mac_addr, struct key_params *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link =
+ ieee80211_link_or_deflink(sdata, link_id, false);
struct ieee80211_local *local = sdata->local;
struct sta_info *sta = NULL;
- const struct ieee80211_cipher_scheme *cs = NULL;
struct ieee80211_key *key;
int err;
if (!ieee80211_sdata_running(sdata))
return -ENETDOWN;
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
if (pairwise && params->mode == NL80211_KEY_SET_TX)
return ieee80211_set_tx(sdata, mac_addr, key_idx);
@@ -453,29 +487,22 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_TKIP:
case WLAN_CIPHER_SUITE_WEP104:
+ if (link_id >= 0)
+ return -EINVAL;
if (WARN_ON_ONCE(fips_enabled))
return -EINVAL;
break;
- case WLAN_CIPHER_SUITE_CCMP:
- case WLAN_CIPHER_SUITE_CCMP_256:
- case WLAN_CIPHER_SUITE_AES_CMAC:
- case WLAN_CIPHER_SUITE_BIP_CMAC_256:
- case WLAN_CIPHER_SUITE_BIP_GMAC_128:
- case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- case WLAN_CIPHER_SUITE_GCMP:
- case WLAN_CIPHER_SUITE_GCMP_256:
- break;
default:
- cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type);
break;
}
key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
- params->key, params->seq_len, params->seq,
- cs);
+ params->key, params->seq_len, params->seq);
if (IS_ERR(key))
return PTR_ERR(key);
+ key->conf.link_id = link_id;
+
if (pairwise)
key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
@@ -537,10 +564,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
break;
}
- if (sta)
- sta->cipher_scheme = cs;
-
- err = ieee80211_key_link(key, sdata, sta);
+ err = ieee80211_key_link(key, link, sta);
out_unlock:
mutex_unlock(&local->sta_mtx);
@@ -548,32 +572,80 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
return err;
}
-static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
- u8 key_idx, bool pairwise, const u8 *mac_addr)
+static struct ieee80211_key *
+ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id,
+ u8 key_idx, bool pairwise, const u8 *mac_addr)
{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- struct sta_info *sta;
- struct ieee80211_key *key = NULL;
- int ret;
+ struct ieee80211_link_data *link = &sdata->deflink;
+ struct ieee80211_key *key;
- mutex_lock(&local->sta_mtx);
- mutex_lock(&local->key_mtx);
+ if (link_id >= 0) {
+ link = rcu_dereference_check(sdata->link[link_id],
+ lockdep_is_held(&sdata->wdev.mtx));
+ if (!link)
+ return NULL;
+ }
if (mac_addr) {
- ret = -ENOENT;
+ struct sta_info *sta;
+ struct link_sta_info *link_sta;
sta = sta_info_get_bss(sdata, mac_addr);
if (!sta)
- goto out_unlock;
+ return NULL;
- if (pairwise)
- key = key_mtx_dereference(local, sta->ptk[key_idx]);
- else
- key = key_mtx_dereference(local, sta->gtk[key_idx]);
- } else
- key = key_mtx_dereference(local, sdata->keys[key_idx]);
+ if (link_id >= 0) {
+ link_sta = rcu_dereference_check(sta->link[link_id],
+ lockdep_is_held(&local->sta_mtx));
+ if (!link_sta)
+ return NULL;
+ } else {
+ link_sta = &sta->deflink;
+ }
+
+ if (pairwise && key_idx < NUM_DEFAULT_KEYS)
+ return rcu_dereference_check_key_mtx(local,
+ sta->ptk[key_idx]);
+
+ if (!pairwise &&
+ key_idx < NUM_DEFAULT_KEYS +
+ NUM_DEFAULT_MGMT_KEYS +
+ NUM_DEFAULT_BEACON_KEYS)
+ return rcu_dereference_check_key_mtx(local,
+ link_sta->gtk[key_idx]);
+ return NULL;
+ }
+
+ if (pairwise && key_idx < NUM_DEFAULT_KEYS)
+ return rcu_dereference_check_key_mtx(local,
+ sdata->keys[key_idx]);
+
+ key = rcu_dereference_check_key_mtx(local, link->gtk[key_idx]);
+ if (key)
+ return key;
+
+ /* or maybe it was a WEP key */
+ if (key_idx < NUM_DEFAULT_KEYS)
+ return rcu_dereference_check_key_mtx(local, sdata->keys[key_idx]);
+
+ return NULL;
+}
+
+static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
+ int link_id, u8 key_idx, bool pairwise,
+ const u8 *mac_addr)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_key *key;
+ int ret;
+
+ mutex_lock(&local->sta_mtx);
+ mutex_lock(&local->key_mtx);
+
+ key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr);
if (!key) {
ret = -ENOENT;
goto out_unlock;
@@ -590,16 +662,15 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
}
static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
- u8 key_idx, bool pairwise, const u8 *mac_addr,
- void *cookie,
+ int link_id, u8 key_idx, bool pairwise,
+ const u8 *mac_addr, void *cookie,
void (*callback)(void *cookie,
struct key_params *params))
{
struct ieee80211_sub_if_data *sdata;
- struct sta_info *sta = NULL;
u8 seq[6] = {0};
struct key_params params;
- struct ieee80211_key *key = NULL;
+ struct ieee80211_key *key;
u64 pn64;
u32 iv32;
u16 iv16;
@@ -610,20 +681,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
rcu_read_lock();
- if (mac_addr) {
- sta = sta_info_get_bss(sdata, mac_addr);
- if (!sta)
- goto out;
-
- if (pairwise && key_idx < NUM_DEFAULT_KEYS)
- key = rcu_dereference(sta->ptk[key_idx]);
- else if (!pairwise &&
- key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS)
- key = rcu_dereference(sta->gtk[key_idx]);
- } else
- key = rcu_dereference(sdata->keys[key_idx]);
-
+ key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr);
if (!key)
goto out;
@@ -710,34 +768,49 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
static int ieee80211_config_default_key(struct wiphy *wiphy,
struct net_device *dev,
- u8 key_idx, bool uni,
+ int link_id, u8 key_idx, bool uni,
bool multi)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link =
+ ieee80211_link_or_deflink(sdata, link_id, false);
+
+ if (IS_ERR(link))
+ return PTR_ERR(link);
- ieee80211_set_default_key(sdata, key_idx, uni, multi);
+ ieee80211_set_default_key(link, key_idx, uni, multi);
return 0;
}
static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
struct net_device *dev,
- u8 key_idx)
+ int link_id, u8 key_idx)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link =
+ ieee80211_link_or_deflink(sdata, link_id, true);
+
+ if (IS_ERR(link))
+ return PTR_ERR(link);
- ieee80211_set_default_mgmt_key(sdata, key_idx);
+ ieee80211_set_default_mgmt_key(link, key_idx);
return 0;
}
static int ieee80211_config_default_beacon_key(struct wiphy *wiphy,
struct net_device *dev,
- u8 key_idx)
+ int link_id, u8 key_idx)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link =
+ ieee80211_link_or_deflink(sdata, link_id, true);
- ieee80211_set_default_beacon_key(sdata, key_idx);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ ieee80211_set_default_beacon_key(link, key_idx);
return 0;
}
@@ -844,9 +917,10 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
if (sdata) {
- ieee80211_vif_release_channel(sdata);
- ret = ieee80211_vif_use_channel(sdata, chandef,
- IEEE80211_CHANCTX_EXCLUSIVE);
+ ieee80211_link_release_channel(&sdata->deflink);
+ ret = ieee80211_link_use_channel(&sdata->deflink,
+ chandef,
+ IEEE80211_CHANCTX_EXCLUSIVE);
}
} else if (local->open_count == local->monitors) {
local->_oper_chandef = *chandef;
@@ -864,14 +938,15 @@ static int
ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
const u8 *resp, size_t resp_len,
const struct ieee80211_csa_settings *csa,
- const struct ieee80211_color_change_settings *cca)
+ const struct ieee80211_color_change_settings *cca,
+ struct ieee80211_link_data *link)
{
struct probe_resp *new, *old;
if (!resp || !resp_len)
return 1;
- old = sdata_dereference(sdata->u.ap.probe_resp, sdata);
+ old = sdata_dereference(link->u.ap.probe_resp, sdata);
new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL);
if (!new)
@@ -887,7 +962,7 @@ ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
else if (cca)
new->cntdwn_counter_offsets[0] = cca->counter_offset_presp;
- rcu_assign_pointer(sdata->u.ap.probe_resp, new);
+ rcu_assign_pointer(link->u.ap.probe_resp, new);
if (old)
kfree_rcu(old, rcu_head);
@@ -895,7 +970,9 @@ ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_fils_discovery *params)
+ struct cfg80211_fils_discovery *params,
+ struct ieee80211_link_data *link,
+ struct ieee80211_bss_conf *link_conf)
{
struct fils_discovery_data *new, *old = NULL;
struct ieee80211_fils_discovery *fd;
@@ -903,17 +980,17 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
if (!params->tmpl || !params->tmpl_len)
return -EINVAL;
- fd = &sdata->vif.bss_conf.fils_discovery;
+ fd = &link_conf->fils_discovery;
fd->min_interval = params->min_interval;
fd->max_interval = params->max_interval;
- old = sdata_dereference(sdata->u.ap.fils_discovery, sdata);
+ old = sdata_dereference(link->u.ap.fils_discovery, sdata);
new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL);
if (!new)
return -ENOMEM;
new->len = params->tmpl_len;
memcpy(new->data, params->tmpl, params->tmpl_len);
- rcu_assign_pointer(sdata->u.ap.fils_discovery, new);
+ rcu_assign_pointer(link->u.ap.fils_discovery, new);
if (old)
kfree_rcu(old, rcu_head);
@@ -923,26 +1000,27 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata,
static int
ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_unsol_bcast_probe_resp *params)
+ struct cfg80211_unsol_bcast_probe_resp *params,
+ struct ieee80211_link_data *link,
+ struct ieee80211_bss_conf *link_conf)
{
struct unsol_bcast_probe_resp_data *new, *old = NULL;
if (!params->tmpl || !params->tmpl_len)
return -EINVAL;
- old = sdata_dereference(sdata->u.ap.unsol_bcast_probe_resp, sdata);
+ old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata);
new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL);
if (!new)
return -ENOMEM;
new->len = params->tmpl_len;
memcpy(new->data, params->tmpl, params->tmpl_len);
- rcu_assign_pointer(sdata->u.ap.unsol_bcast_probe_resp, new);
+ rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new);
if (old)
kfree_rcu(old, rcu_head);
- sdata->vif.bss_conf.unsol_bcast_probe_resp_interval =
- params->interval;
+ link_conf->unsol_bcast_probe_resp_interval = params->interval;
return 0;
}
@@ -950,18 +1028,17 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
static int ieee80211_set_ftm_responder_params(
struct ieee80211_sub_if_data *sdata,
const u8 *lci, size_t lci_len,
- const u8 *civicloc, size_t civicloc_len)
+ const u8 *civicloc, size_t civicloc_len,
+ struct ieee80211_bss_conf *link_conf)
{
struct ieee80211_ftm_responder_params *new, *old;
- struct ieee80211_bss_conf *bss_conf;
u8 *pos;
int len;
if (!lci_len && !civicloc_len)
return 0;
- bss_conf = &sdata->vif.bss_conf;
- old = bss_conf->ftmr_params;
+ old = link_conf->ftmr_params;
len = lci_len + civicloc_len;
new = kzalloc(sizeof(*new) + len, GFP_KERNEL);
@@ -983,24 +1060,43 @@ static int ieee80211_set_ftm_responder_params(
pos += civicloc_len;
}
- bss_conf->ftmr_params = new;
+ link_conf->ftmr_params = new;
kfree(old);
return 0;
}
+static int
+ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst,
+ struct cfg80211_mbssid_elems *src)
+{
+ int i, offset = 0;
+
+ for (i = 0; i < src->cnt; i++) {
+ memcpy(pos + offset, src->elem[i].data, src->elem[i].len);
+ dst->elem[i].len = src->elem[i].len;
+ dst->elem[i].data = pos + offset;
+ offset += dst->elem[i].len;
+ }
+ dst->cnt = src->cnt;
+
+ return offset;
+}
+
static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct cfg80211_beacon_data *params,
const struct ieee80211_csa_settings *csa,
const struct ieee80211_color_change_settings *cca)
{
+ struct cfg80211_mbssid_elems *mbssid = NULL;
struct beacon_data *new, *old;
int new_head_len, new_tail_len;
int size, err;
u32 changed = BSS_CHANGED_BEACON;
+ struct ieee80211_bss_conf *link_conf = link->conf;
- old = sdata_dereference(sdata->u.ap.beacon, sdata);
-
+ old = sdata_dereference(link->u.ap.beacon, sdata);
/* Need to have a beacon head if we don't have one yet */
if (!params->head && !old)
@@ -1021,6 +1117,17 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
size = sizeof(*new) + new_head_len + new_tail_len;
+ /* new or old multiple BSSID elements? */
+ if (params->mbssid_ies) {
+ mbssid = params->mbssid_ies;
+ size += struct_size(new->mbssid_ies, elem, mbssid->cnt);
+ size += ieee80211_get_mbssid_beacon_len(mbssid);
+ } else if (old && old->mbssid_ies) {
+ mbssid = old->mbssid_ies;
+ size += struct_size(new->mbssid_ies, elem, mbssid->cnt);
+ size += ieee80211_get_mbssid_beacon_len(mbssid);
+ }
+
new = kzalloc(size, GFP_KERNEL);
if (!new)
return -ENOMEM;
@@ -1029,12 +1136,23 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
/*
* pointers go into the block we allocated,
- * memory is | beacon_data | head | tail |
+ * memory is | beacon_data | head | tail | mbssid_ies
*/
new->head = ((u8 *) new) + sizeof(*new);
new->tail = new->head + new_head_len;
new->head_len = new_head_len;
new->tail_len = new_tail_len;
+ /* copy in optional mbssid_ies */
+ if (mbssid) {
+ u8 *pos = new->tail + new->tail_len;
+
+ new->mbssid_ies = (void *)pos;
+ pos += struct_size(new->mbssid_ies, elem, mbssid->cnt);
+ ieee80211_copy_mbssid_beacon(pos, new->mbssid_ies, mbssid);
+ /* update bssid_indicator */
+ link_conf->bssid_indicator =
+ ilog2(__roundup_pow_of_two(mbssid->cnt + 1));
+ }
if (csa) {
new->cntdwn_current_counter = csa->count;
@@ -1060,7 +1178,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
memcpy(new->tail, old->tail, new_tail_len);
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
- params->probe_resp_len, csa, cca);
+ params->probe_resp_len, csa, cca, link);
if (err < 0) {
kfree(new);
return err;
@@ -1069,12 +1187,13 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
changed |= BSS_CHANGED_AP_PROBE_RESP;
if (params->ftm_responder != -1) {
- sdata->vif.bss_conf.ftm_responder = params->ftm_responder;
+ link_conf->ftm_responder = params->ftm_responder;
err = ieee80211_set_ftm_responder_params(sdata,
params->lci,
params->lci_len,
params->civicloc,
- params->civicloc_len);
+ params->civicloc_len,
+ link_conf);
if (err < 0) {
kfree(new);
@@ -1084,7 +1203,8 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
changed |= BSS_CHANGED_FTM_RESPONDER;
}
- rcu_assign_pointer(sdata->u.ap.beacon, new);
+ rcu_assign_pointer(link->u.ap.beacon, new);
+ sdata->u.ap.active = true;
if (old)
kfree_rcu(old, rcu_head);
@@ -1102,57 +1222,66 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
u32 changed = BSS_CHANGED_BEACON_INT |
BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_BEACON |
- BSS_CHANGED_SSID |
BSS_CHANGED_P2P_PS |
BSS_CHANGED_TXPOWER |
BSS_CHANGED_TWT;
int i, err;
int prev_beacon_int;
+ unsigned int link_id = params->beacon.link_id;
+ struct ieee80211_link_data *link;
+ struct ieee80211_bss_conf *link_conf;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ return -ENOLINK;
+
+ link_conf = link->conf;
- old = sdata_dereference(sdata->u.ap.beacon, sdata);
+ old = sdata_dereference(link->u.ap.beacon, sdata);
if (old)
return -EALREADY;
if (params->smps_mode != NL80211_SMPS_OFF)
return -ENOTSUPP;
- sdata->smps_mode = IEEE80211_SMPS_OFF;
+ link->smps_mode = IEEE80211_SMPS_OFF;
- sdata->needed_rx_chains = sdata->local->rx_chains;
+ link->needed_rx_chains = sdata->local->rx_chains;
- prev_beacon_int = sdata->vif.bss_conf.beacon_int;
- sdata->vif.bss_conf.beacon_int = params->beacon_interval;
+ prev_beacon_int = link_conf->beacon_int;
+ link_conf->beacon_int = params->beacon_interval;
if (params->he_cap && params->he_oper) {
- sdata->vif.bss_conf.he_support = true;
- sdata->vif.bss_conf.htc_trig_based_pkt_ext =
+ link_conf->he_support = true;
+ link_conf->htc_trig_based_pkt_ext =
le32_get_bits(params->he_oper->he_oper_params,
IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK);
- sdata->vif.bss_conf.frame_time_rts_th =
+ link_conf->frame_time_rts_th =
le32_get_bits(params->he_oper->he_oper_params,
IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
changed |= BSS_CHANGED_HE_OBSS_PD;
- if (params->he_bss_color.enabled)
+ if (params->beacon.he_bss_color.enabled)
changed |= BSS_CHANGED_HE_BSS_COLOR;
}
if (sdata->vif.type == NL80211_IFTYPE_AP &&
params->mbssid_config.tx_wdev) {
err = ieee80211_set_ap_mbssid_options(sdata,
- params->mbssid_config);
+ params->mbssid_config,
+ link_conf);
if (err)
return err;
}
mutex_lock(&local->mtx);
- err = ieee80211_vif_use_channel(sdata, &params->chandef,
- IEEE80211_CHANCTX_SHARED);
+ err = ieee80211_link_use_channel(link, &params->chandef,
+ IEEE80211_CHANCTX_SHARED);
if (!err)
- ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+ ieee80211_link_copy_chanctx_to_vlans(link, false);
mutex_unlock(&local->mtx);
if (err) {
- sdata->vif.bss_conf.beacon_int = prev_beacon_int;
+ link_conf->beacon_int = prev_beacon_int;
return err;
}
@@ -1166,9 +1295,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
params->crypto.control_port_over_nl80211;
sdata->control_port_no_preauth =
params->crypto.control_port_no_preauth;
- sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local,
- &params->crypto,
- sdata->vif.type);
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
vlan->control_port_protocol =
@@ -1179,34 +1305,30 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
params->crypto.control_port_over_nl80211;
vlan->control_port_no_preauth =
params->crypto.control_port_no_preauth;
- vlan->encrypt_headroom =
- ieee80211_cs_headroom(sdata->local,
- &params->crypto,
- vlan->vif.type);
- }
-
- sdata->vif.bss_conf.dtim_period = params->dtim_period;
- sdata->vif.bss_conf.enable_beacon = true;
- sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
- sdata->vif.bss_conf.twt_responder = params->twt_responder;
- sdata->vif.bss_conf.he_obss_pd = params->he_obss_pd;
- sdata->vif.bss_conf.he_bss_color = params->he_bss_color;
- sdata->vif.bss_conf.s1g = params->chandef.chan->band ==
+ }
+
+ link_conf->dtim_period = params->dtim_period;
+ link_conf->enable_beacon = true;
+ link_conf->allow_p2p_go_ps = sdata->vif.p2p;
+ link_conf->twt_responder = params->twt_responder;
+ link_conf->he_obss_pd = params->he_obss_pd;
+ link_conf->he_bss_color = params->beacon.he_bss_color;
+ sdata->vif.cfg.s1g = params->chandef.chan->band ==
NL80211_BAND_S1GHZ;
- sdata->vif.bss_conf.ssid_len = params->ssid_len;
+ sdata->vif.cfg.ssid_len = params->ssid_len;
if (params->ssid_len)
- memcpy(sdata->vif.bss_conf.ssid, params->ssid,
+ memcpy(sdata->vif.cfg.ssid, params->ssid,
params->ssid_len);
- sdata->vif.bss_conf.hidden_ssid =
+ link_conf->hidden_ssid =
(params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE);
- memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
- sizeof(sdata->vif.bss_conf.p2p_noa_attr));
- sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow =
+ memset(&link_conf->p2p_noa_attr, 0,
+ sizeof(link_conf->p2p_noa_attr));
+ link_conf->p2p_noa_attr.oppps_ctwindow =
params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
if (params->p2p_opp_ps)
- sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
+ link_conf->p2p_noa_attr.oppps_ctwindow |=
IEEE80211_P2P_OPPPS_ENABLE_BIT;
sdata->beacon_rate_set = false;
@@ -1221,16 +1343,17 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
}
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
- sdata->vif.bss_conf.beacon_tx_rate = params->beacon_rate;
+ link_conf->beacon_tx_rate = params->beacon_rate;
- err = ieee80211_assign_beacon(sdata, &params->beacon, NULL, NULL);
+ err = ieee80211_assign_beacon(sdata, link, &params->beacon, NULL, NULL);
if (err < 0)
goto error;
changed |= err;
if (params->fils_discovery.max_interval) {
err = ieee80211_set_fils_discovery(sdata,
- &params->fils_discovery);
+ &params->fils_discovery,
+ link, link_conf);
if (err < 0)
goto error;
changed |= BSS_CHANGED_FILS_DISCOVERY;
@@ -1238,24 +1361,27 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (params->unsol_bcast_probe_resp.interval) {
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
- &params->unsol_bcast_probe_resp);
+ &params->unsol_bcast_probe_resp,
+ link, link_conf);
if (err < 0)
goto error;
changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
}
- err = drv_start_ap(sdata->local, sdata);
+ err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
- old = sdata_dereference(sdata->u.ap.beacon, sdata);
+ old = sdata_dereference(link->u.ap.beacon, sdata);
if (old)
kfree_rcu(old, rcu_head);
- RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
+ RCU_INIT_POINTER(link->u.ap.beacon, NULL);
+ sdata->u.ap.active = false;
goto error;
}
ieee80211_recalc_dtim(local, sdata);
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID);
+ ieee80211_link_info_change_notify(sdata, link, changed);
netif_carrier_on(dev);
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
@@ -1265,7 +1391,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
error:
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(link);
mutex_unlock(&local->mtx);
return err;
@@ -1274,31 +1400,56 @@ error:
static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_beacon_data *params)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link;
struct beacon_data *old;
int err;
+ struct ieee80211_bss_conf *link_conf;
- sdata = IEEE80211_DEV_TO_SUB_IF(dev);
sdata_assert_lock(sdata);
+ link = sdata_dereference(sdata->link[params->link_id], sdata);
+ if (!link)
+ return -ENOLINK;
+
+ link_conf = link->conf;
+
/* don't allow changing the beacon while a countdown is in place - offset
* of channel switch counter may change
*/
- if (sdata->vif.csa_active || sdata->vif.color_change_active)
+ if (link_conf->csa_active || link_conf->color_change_active)
return -EBUSY;
- old = sdata_dereference(sdata->u.ap.beacon, sdata);
+ old = sdata_dereference(link->u.ap.beacon, sdata);
if (!old)
return -ENOENT;
- err = ieee80211_assign_beacon(sdata, params, NULL, NULL);
+ err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL);
if (err < 0)
return err;
- ieee80211_bss_info_change_notify(sdata, err);
+
+ if (params->he_bss_color_valid &&
+ params->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
+ link_conf->he_bss_color.enabled = params->he_bss_color.enabled;
+ err |= BSS_CHANGED_HE_BSS_COLOR;
+ }
+
+ ieee80211_link_info_change_notify(sdata, link, err);
return 0;
}
-static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
+static void ieee80211_free_next_beacon(struct ieee80211_link_data *link)
+{
+ if (!link->u.ap.next_beacon)
+ return;
+
+ kfree(link->u.ap.next_beacon->mbssid_ies);
+ kfree(link->u.ap.next_beacon);
+ link->u.ap.next_beacon = NULL;
+}
+
+static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
+ unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_sub_if_data *vlan;
@@ -1308,32 +1459,35 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
struct fils_discovery_data *old_fils_discovery;
struct unsol_bcast_probe_resp_data *old_unsol_bcast_probe_resp;
struct cfg80211_chan_def chandef;
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
+ struct ieee80211_bss_conf *link_conf = link->conf;
sdata_assert_lock(sdata);
- old_beacon = sdata_dereference(sdata->u.ap.beacon, sdata);
+ old_beacon = sdata_dereference(link->u.ap.beacon, sdata);
if (!old_beacon)
return -ENOENT;
- old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata);
- old_fils_discovery = sdata_dereference(sdata->u.ap.fils_discovery,
+ old_probe_resp = sdata_dereference(link->u.ap.probe_resp,
+ sdata);
+ old_fils_discovery = sdata_dereference(link->u.ap.fils_discovery,
sdata);
old_unsol_bcast_probe_resp =
- sdata_dereference(sdata->u.ap.unsol_bcast_probe_resp,
+ sdata_dereference(link->u.ap.unsol_bcast_probe_resp,
sdata);
/* abort any running channel switch */
mutex_lock(&local->mtx);
- sdata->vif.csa_active = false;
- if (sdata->csa_block_tx) {
+ link_conf->csa_active = false;
+ if (link->csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
+ link->csa_block_tx = false;
}
mutex_unlock(&local->mtx);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ ieee80211_free_next_beacon(link);
/* turn off carrier for this interface and dependent VLANs */
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
@@ -1341,10 +1495,11 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
netif_carrier_off(dev);
/* remove beacon and probe response */
- RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
- RCU_INIT_POINTER(sdata->u.ap.probe_resp, NULL);
- RCU_INIT_POINTER(sdata->u.ap.fils_discovery, NULL);
- RCU_INIT_POINTER(sdata->u.ap.unsol_bcast_probe_resp, NULL);
+ sdata->u.ap.active = false;
+ RCU_INIT_POINTER(link->u.ap.beacon, NULL);
+ RCU_INIT_POINTER(link->u.ap.probe_resp, NULL);
+ RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
+ RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
kfree_rcu(old_beacon, rcu_head);
if (old_probe_resp)
kfree_rcu(old_probe_resp, rcu_head);
@@ -1353,35 +1508,36 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
if (old_unsol_bcast_probe_resp)
kfree_rcu(old_unsol_bcast_probe_resp, rcu_head);
- kfree(sdata->vif.bss_conf.ftmr_params);
- sdata->vif.bss_conf.ftmr_params = NULL;
+ kfree(link_conf->ftmr_params);
+ link_conf->ftmr_params = NULL;
__sta_info_flush(sdata, true);
ieee80211_free_keys(sdata, true);
- sdata->vif.bss_conf.enable_beacon = false;
+ link_conf->enable_beacon = false;
sdata->beacon_rate_set = false;
- sdata->vif.bss_conf.ssid_len = 0;
+ sdata->vif.cfg.ssid_len = 0;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_BEACON_ENABLED);
if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chandef;
- cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
+ chandef = link_conf->chandef;
+ cancel_delayed_work_sync(&link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
GFP_KERNEL);
}
- drv_stop_ap(sdata->local, sdata);
+ drv_stop_ap(sdata->local, sdata, link_conf);
/* free all potentially still buffered bcast frames */
local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
mutex_lock(&local->mtx);
- ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_copy_chanctx_to_vlans(link, true);
+ ieee80211_link_release_channel(link);
mutex_unlock(&local->mtx);
return 0;
@@ -1512,50 +1668,111 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
#endif
}
-static void sta_apply_airtime_params(struct ieee80211_local *local,
- struct sta_info *sta,
- struct station_parameters *params)
+static int sta_link_apply_parameters(struct ieee80211_local *local,
+ struct sta_info *sta, bool new_link,
+ struct link_station_parameters *params)
{
- u8 ac;
+ int ret = 0;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ u32 link_id = params->link_id < 0 ? 0 : params->link_id;
+ struct ieee80211_link_data *link =
+ sdata_dereference(sdata->link[link_id], sdata);
+ struct link_sta_info *link_sta =
+ rcu_dereference_protected(sta->link[link_id],
+ lockdep_is_held(&local->sta_mtx));
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- struct airtime_sched_info *air_sched = &local->airtime[ac];
- struct airtime_info *air_info = &sta->airtime[ac];
- struct txq_info *txqi;
- u8 tid;
-
- spin_lock_bh(&air_sched->lock);
- for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
- if (air_info->weight == params->airtime_weight ||
- !sta->sta.txq[tid] ||
- ac != ieee80211_ac_from_tid(tid))
- continue;
+ /*
+ * If there are no changes, then accept a link that doesn't exist,
+ * unless it's a new link.
+ */
+ if (params->link_id < 0 && !new_link &&
+ !params->link_mac && !params->txpwr_set &&
+ !params->supported_rates_len &&
+ !params->ht_capa && !params->vht_capa &&
+ !params->he_capa && !params->eht_capa &&
+ !params->opmode_notif_used)
+ return 0;
- airtime_weight_set(air_info, params->airtime_weight);
+ if (!link || !link_sta)
+ return -EINVAL;
- txqi = to_txq_info(sta->sta.txq[tid]);
- if (RB_EMPTY_NODE(&txqi->schedule_order))
- continue;
+ sband = ieee80211_get_link_sband(link);
+ if (!sband)
+ return -EINVAL;
- ieee80211_update_airtime_weight(local, air_sched,
- 0, true);
+ if (params->link_mac) {
+ if (new_link) {
+ memcpy(link_sta->addr, params->link_mac, ETH_ALEN);
+ memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN);
+ } else if (!ether_addr_equal(link_sta->addr,
+ params->link_mac)) {
+ return -EINVAL;
}
- spin_unlock_bh(&air_sched->lock);
+ } else if (new_link) {
+ return -EINVAL;
+ }
+
+ if (params->txpwr_set) {
+ link_sta->pub->txpwr.type = params->txpwr.type;
+ if (params->txpwr.type == NL80211_TX_POWER_LIMITED)
+ link_sta->pub->txpwr.power = params->txpwr.power;
+ ret = drv_sta_set_txpwr(local, sdata, sta);
+ if (ret)
+ return ret;
+ }
+
+ if (params->supported_rates &&
+ params->supported_rates_len) {
+ ieee80211_parse_bitrates(link->conf->chandef.width,
+ sband, params->supported_rates,
+ params->supported_rates_len,
+ &link_sta->pub->supp_rates[sband->band]);
}
+
+ if (params->ht_capa)
+ ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
+ params->ht_capa, link_sta);
+
+ /* VHT can override some HT caps such as the A-MSDU max length */
+ if (params->vht_capa)
+ ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+ params->vht_capa, link_sta);
+
+ if (params->he_capa)
+ ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+ (void *)params->he_capa,
+ params->he_capa_len,
+ (void *)params->he_6ghz_capa,
+ link_sta);
+
+ if (params->eht_capa)
+ ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband,
+ (u8 *)params->he_capa,
+ params->he_capa_len,
+ params->eht_capa,
+ params->eht_capa_len,
+ link_sta);
+
+ if (params->opmode_notif_used) {
+ /* returned value is only needed for rc update, but the
+ * rc isn't initialized here yet, so ignore it
+ */
+ __ieee80211_vht_handle_opmode(sdata, link_sta,
+ params->opmode_notif,
+ sband->band);
+ }
+
+ return ret;
}
static int sta_apply_parameters(struct ieee80211_local *local,
struct sta_info *sta,
struct station_parameters *params)
{
- int ret = 0;
- struct ieee80211_supported_band *sband;
struct ieee80211_sub_if_data *sdata = sta->sdata;
u32 mask, set;
-
- sband = ieee80211_get_sband(sdata);
- if (!sband)
- return -EINVAL;
+ int ret = 0;
mask = params->sta_flags_mask;
set = params->sta_flags_set;
@@ -1621,7 +1838,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
/* mark TDLS channel switch support, if the AP allows it */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
- !sdata->u.mgd.tdls_chan_switch_prohibited &&
+ !sdata->deflink.u.mgd.tdls_chan_switch_prohibited &&
params->ext_capab_len >= 4 &&
params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH)
set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH);
@@ -1638,33 +1855,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta->sta.max_sp = params->max_sp;
}
- /* The sender might not have sent the last bit, consider it to be 0 */
- if (params->ext_capab_len >= 8) {
- u8 val = (params->ext_capab[7] &
- WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB) >> 7;
-
- /* we did get all the bits, take the MSB as well */
- if (params->ext_capab_len >= 9) {
- u8 val_msb = params->ext_capab[8] &
- WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB;
- val_msb <<= 1;
- val |= val_msb;
- }
-
- switch (val) {
- case 1:
- sta->sta.max_amsdu_subframes = 32;
- break;
- case 2:
- sta->sta.max_amsdu_subframes = 16;
- break;
- case 3:
- sta->sta.max_amsdu_subframes = 8;
- break;
- default:
- sta->sta.max_amsdu_subframes = 0;
- }
- }
+ ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab,
+ params->ext_capab_len);
/*
* cfg80211 validates this (1-2007) and allows setting the AID
@@ -1684,45 +1876,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->listen_interval >= 0)
sta->listen_interval = params->listen_interval;
- if (params->sta_modify_mask & STATION_PARAM_APPLY_STA_TXPOWER) {
- sta->sta.txpwr.type = params->txpwr.type;
- if (params->txpwr.type == NL80211_TX_POWER_LIMITED)
- sta->sta.txpwr.power = params->txpwr.power;
- ret = drv_sta_set_txpwr(local, sdata, sta);
- if (ret)
- return ret;
- }
-
- if (params->supported_rates && params->supported_rates_len) {
- ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
- sband, params->supported_rates,
- params->supported_rates_len,
- &sta->sta.supp_rates[sband->band]);
- }
-
- if (params->ht_capa)
- ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
- params->ht_capa, sta);
-
- /* VHT can override some HT caps such as the A-MSDU max length */
- if (params->vht_capa)
- ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- params->vht_capa, sta);
-
- if (params->he_capa)
- ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
- (void *)params->he_capa,
- params->he_capa_len,
- (void *)params->he_6ghz_capa,
- sta);
-
- if (params->opmode_notif_used) {
- /* returned value is only needed for rc update, but the
- * rc isn't initialized here yet, so ignore it
- */
- __ieee80211_vht_handle_opmode(sdata, sta, params->opmode_notif,
- sband->band);
- }
+ ret = sta_link_apply_parameters(local, sta, false,
+ &params->link_sta_params);
+ if (ret)
+ return ret;
if (params->support_p2p_ps >= 0)
sta->sta.support_p2p_ps = params->support_p2p_ps;
@@ -1731,8 +1888,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta_apply_mesh_params(local, sta, params);
if (params->airtime_weight)
- sta_apply_airtime_params(local, sta, params);
-
+ sta->airtime_weight = params->airtime_weight;
/* set the STA state after all sta info from usermode has been set */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
@@ -1742,6 +1898,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
return ret;
}
+ /* Mark the STA as MLO if MLD MAC address is available */
+ if (params->link_sta_params.mld_mac)
+ sta->sta.mlo = true;
+
return 0;
}
@@ -1774,14 +1934,32 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
!sdata->u.mgd.associated)
return -EINVAL;
- sta = sta_info_alloc(sdata, mac, GFP_KERNEL);
+ /*
+ * If we have a link ID, it can be a non-MLO station on an AP MLD,
+ * but we need to have a link_mac in that case as well, so use the
+ * STA's MAC address in that case.
+ */
+ if (params->link_sta_params.link_id >= 0)
+ sta = sta_info_alloc_with_link(sdata, mac,
+ params->link_sta_params.link_id,
+ params->link_sta_params.link_mac ?: mac,
+ GFP_KERNEL);
+ else
+ sta = sta_info_alloc(sdata, mac, GFP_KERNEL);
+
if (!sta)
return -ENOMEM;
if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
sta->sta.tdls = true;
+ /* Though the mutex is not needed here (since the station is not
+ * visible yet), sta_apply_parameters (and inner functions) require
+ * the mutex due to other paths.
+ */
+ mutex_lock(&local->sta_mtx);
err = sta_apply_parameters(local, sta, params);
+ mutex_unlock(&local->sta_mtx);
if (err) {
sta_info_free(local, sta);
return err;
@@ -1901,7 +2079,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
}
- err = sta_apply_parameters(local, sta, params);
+ /* we use sta_info_get_bss() so this might be different */
+ if (sdata != sta->sdata) {
+ mutex_lock_nested(&sta->sdata->wdev.mtx, 1);
+ err = sta_apply_parameters(local, sta, params);
+ mutex_unlock(&sta->sdata->wdev.mtx);
+ } else {
+ err = sta_apply_parameters(local, sta, params);
+ }
if (err)
goto out_err;
@@ -2148,14 +2333,12 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
const struct mesh_setup *setup)
{
u8 *new_ie;
- const u8 *old_ie;
struct ieee80211_sub_if_data *sdata = container_of(ifmsh,
struct ieee80211_sub_if_data, u.mesh);
int i;
/* allocate information elements */
new_ie = NULL;
- old_ie = ifmsh->ie;
if (setup->ie_len) {
new_ie = kmemdup(setup->ie, setup->ie_len,
@@ -2165,7 +2348,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
}
ifmsh->ie_len = setup->ie_len;
ifmsh->ie = new_ie;
- kfree(old_ie);
/* now copy the rest of the setup parameters */
ifmsh->mesh_id_len = setup->mesh_id_len;
@@ -2291,7 +2473,8 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) {
conf->ht_opmode = nconf->ht_opmode;
sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_HT);
}
if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask))
conf->dot11MeshHWMPactivePathToRootTimeout =
@@ -2339,12 +2522,12 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
sdata->control_port_over_nl80211 = setup->control_port_over_nl80211;
/* can mesh use other SMPS modes? */
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- sdata->needed_rx_chains = sdata->local->rx_chains;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
+ sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
mutex_lock(&sdata->local->mtx);
- err = ieee80211_vif_use_channel(sdata, &setup->chandef,
- IEEE80211_CHANCTX_SHARED);
+ err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
+ IEEE80211_CHANCTX_SHARED);
mutex_unlock(&sdata->local->mtx);
if (err)
return err;
@@ -2358,7 +2541,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
ieee80211_stop_mesh(sdata);
mutex_lock(&sdata->local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
kfree(sdata->u.mesh.ie);
mutex_unlock(&sdata->local->mtx);
@@ -2374,7 +2557,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
struct ieee80211_supported_band *sband;
u32 changed = 0;
- if (!sdata_dereference(sdata->u.ap.beacon, sdata))
+ if (!sdata_dereference(sdata->deflink.u.ap.beacon, sdata))
return -ENOENT;
sband = ieee80211_get_sband(sdata);
@@ -2405,13 +2588,13 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (params->basic_rates) {
- ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
+ ieee80211_parse_bitrates(sdata->vif.bss_conf.chandef.width,
wiphy->bands[sband->band],
params->basic_rates,
params->basic_rates_len,
&sdata->vif.bss_conf.basic_rates);
changed |= BSS_CHANGED_BASIC_RATES;
- ieee80211_check_rate_mask(sdata);
+ ieee80211_check_rate_mask(&sdata->deflink);
}
if (params->ap_isolate >= 0) {
@@ -2446,7 +2629,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
changed |= BSS_CHANGED_P2P_PS;
}
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
return 0;
}
@@ -2457,6 +2640,8 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
{
struct ieee80211_local *local = wiphy_priv(wiphy);
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link =
+ ieee80211_link_or_deflink(sdata, params->link_id, true);
struct ieee80211_tx_queue_params p;
if (!local->ops->conf_tx)
@@ -2465,6 +2650,9 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
if (local->hw.queues < IEEE80211_NUM_ACS)
return -EOPNOTSUPP;
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
memset(&p, 0, sizeof(p));
p.aifs = params->aifs;
p.cw_max = params->cwmax;
@@ -2479,15 +2667,16 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac);
- sdata->tx_conf[params->ac] = p;
- if (drv_conf_tx(local, sdata, params->ac, &p)) {
+ link->tx_conf[params->ac] = p;
+ if (drv_conf_tx(local, link, params->ac, &p)) {
wiphy_debug(local->hw.wiphy,
"failed to set TX queue parameters for AC %d\n",
params->ac);
return -EINVAL;
}
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_QOS);
return 0;
}
@@ -2539,7 +2728,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
* the frames sent while scanning on other channel will be
* lost)
*/
- if (sdata->u.ap.beacon &&
+ if (sdata->deflink.u.ap.beacon &&
(!(wiphy->features & NL80211_FEATURE_AP_SCAN) ||
!(req->flags & NL80211_SCAN_FLAG_AP)))
return -EOPNOTSUPP;
@@ -2636,7 +2825,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
memcpy(sdata->vif.bss_conf.mcast_rate, rate,
sizeof(int) * NUM_NL80211_BANDS);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MCAST_RATE);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_MCAST_RATE);
return 0;
}
@@ -2720,14 +2910,15 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
switch (type) {
case NL80211_TX_POWER_AUTOMATIC:
- sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ sdata->deflink.user_power_level =
+ IEEE80211_UNSET_POWER_LEVEL;
txp_type = NL80211_TX_POWER_LIMITED;
break;
case NL80211_TX_POWER_LIMITED:
case NL80211_TX_POWER_FIXED:
if (mbm < 0 || (mbm % 100))
return -EOPNOTSUPP;
- sdata->user_power_level = MBM_TO_DBM(mbm);
+ sdata->deflink.user_power_level = MBM_TO_DBM(mbm);
break;
}
@@ -2760,7 +2951,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
has_monitor = true;
continue;
}
- sdata->user_power_level = local->user_power_level;
+ sdata->deflink.user_power_level = local->user_power_level;
if (txp_type != sdata->vif.bss_conf.txpower_type)
update_txp_type = true;
sdata->vif.bss_conf.txpower_type = txp_type;
@@ -2776,7 +2967,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
if (sdata) {
- sdata->user_power_level = local->user_power_level;
+ sdata->deflink.user_power_level = local->user_power_level;
if (txp_type != sdata->vif.bss_conf.txpower_type)
update_txp_type = true;
sdata->vif.bss_conf.txpower_type = txp_type;
@@ -2850,6 +3041,7 @@ static int ieee80211_testmode_dump(struct wiphy *wiphy,
#endif
int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
enum ieee80211_smps_mode smps_mode)
{
const u8 *ap;
@@ -2863,8 +3055,8 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION))
return -EINVAL;
- old_req = sdata->u.mgd.req_smps;
- sdata->u.mgd.req_smps = smps_mode;
+ old_req = link->u.mgd.req_smps;
+ link->u.mgd.req_smps = smps_mode;
if (old_req == smps_mode &&
smps_mode != IEEE80211_SMPS_AUTOMATIC)
@@ -2876,10 +3068,10 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
* the new value until we associate.
*/
if (!sdata->u.mgd.associated ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
+ link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
return 0;
- ap = sdata->u.mgd.associated->bssid;
+ ap = link->u.mgd.bssid;
rcu_read_lock();
list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
@@ -2903,7 +3095,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
err = ieee80211_send_smps_action(sdata, smps_mode,
ap, ap);
if (err)
- sdata->u.mgd.req_smps = old_req;
+ link->u.mgd.req_smps = old_req;
else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found)
ieee80211_teardown_tdls_peers(sdata);
@@ -2915,6 +3107,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+ unsigned int link_id;
if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EOPNOTSUPP;
@@ -2931,7 +3124,16 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
/* no change, but if automatic follow powersave */
sdata_lock(sdata);
- __ieee80211_request_smps_mgd(sdata, sdata->u.mgd.req_smps);
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+
+ if (!link)
+ continue;
+ __ieee80211_request_smps_mgd(sdata, link,
+ link->u.mgd.req_smps);
+ }
sdata_unlock(sdata);
if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
@@ -2964,12 +3166,13 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
bss_conf->cqm_rssi_hyst = rssi_hyst;
bss_conf->cqm_rssi_low = 0;
bss_conf->cqm_rssi_high = 0;
- sdata->u.mgd.last_cqm_event_signal = 0;
+ sdata->deflink.u.mgd.last_cqm_event_signal = 0;
/* tell the driver upon association, unless already associated */
if (sdata->u.mgd.associated &&
sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_CQM);
return 0;
}
@@ -2989,18 +3192,20 @@ static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy,
bss_conf->cqm_rssi_high = rssi_high;
bss_conf->cqm_rssi_thold = 0;
bss_conf->cqm_rssi_hyst = 0;
- sdata->u.mgd.last_cqm_event_signal = 0;
+ sdata->deflink.u.mgd.last_cqm_event_signal = 0;
/* tell the driver upon association, unless already associated */
if (sdata->u.mgd.associated &&
sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_CQM);
return 0;
}
static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
struct net_device *dev,
+ unsigned int link_id,
const u8 *addr,
const struct cfg80211_bitrate_mask *mask)
{
@@ -3017,7 +3222,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
* to send something, and if we're an AP we have to be able to do
* so at a basic rate so that all clients can receive it.
*/
- if (rcu_access_pointer(sdata->vif.chanctx_conf) &&
+ if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) &&
sdata->vif.bss_conf.chandef.chan) {
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band;
@@ -3082,16 +3287,16 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
}
/* whatever, but channel contexts should not complain about that one */
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- sdata->needed_rx_chains = local->rx_chains;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
+ sdata->deflink.needed_rx_chains = local->rx_chains;
- err = ieee80211_vif_use_channel(sdata, chandef,
- IEEE80211_CHANCTX_SHARED);
+ err = ieee80211_link_use_channel(&sdata->deflink, chandef,
+ IEEE80211_CHANCTX_SHARED);
if (err)
goto out_unlock;
ieee80211_queue_delayed_work(&sdata->local->hw,
- &sdata->dfs_cac_timer_work,
+ &sdata->deflink.dfs_cac_timer_work,
msecs_to_jiffies(cac_time_ms));
out_unlock:
@@ -3111,10 +3316,10 @@ static void ieee80211_end_cac(struct wiphy *wiphy,
* by the time it gets it, sdata->wdev.cac_started
* will no longer be true
*/
- cancel_delayed_work(&sdata->dfs_cac_timer_work);
+ cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
sdata->wdev.cac_started = false;
}
}
@@ -3130,12 +3335,24 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
beacon->proberesp_ies_len + beacon->assocresp_ies_len +
- beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len;
+ beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len +
+ ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies);
new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
if (!new_beacon)
return NULL;
+ if (beacon->mbssid_ies && beacon->mbssid_ies->cnt) {
+ new_beacon->mbssid_ies =
+ kzalloc(struct_size(new_beacon->mbssid_ies,
+ elem, beacon->mbssid_ies->cnt),
+ GFP_KERNEL);
+ if (!new_beacon->mbssid_ies) {
+ kfree(new_beacon);
+ return NULL;
+ }
+ }
+
pos = (u8 *)(new_beacon + 1);
if (beacon->head_len) {
new_beacon->head_len = beacon->head_len;
@@ -3173,6 +3390,10 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
pos += beacon->probe_resp_len;
}
+ if (beacon->mbssid_ies && beacon->mbssid_ies->cnt)
+ pos += ieee80211_copy_mbssid_beacon(pos,
+ new_beacon->mbssid_ies,
+ beacon->mbssid_ies);
/* might copy -1, meaning no changes requested */
new_beacon->ftm_responder = beacon->ftm_responder;
@@ -3195,9 +3416,31 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
void ieee80211_csa_finish(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->csa_finalize_work);
+ rcu_read_lock();
+
+ if (vif->mbssid_tx_vif == vif) {
+ /* Trigger ieee80211_csa_finish() on the non-transmitting
+ * interfaces when channel switch is received on
+ * transmitting interface
+ */
+ struct ieee80211_sub_if_data *iter;
+
+ list_for_each_entry_rcu(iter, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(iter))
+ continue;
+
+ if (iter == sdata || iter->vif.mbssid_tx_vif != vif)
+ continue;
+
+ ieee80211_queue_work(&iter->local->hw,
+ &iter->deflink.csa_finalize_work);
+ }
+ }
+ ieee80211_queue_work(&local->hw, &sdata->deflink.csa_finalize_work);
+
+ rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_csa_finish);
@@ -3207,7 +3450,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- sdata->csa_block_tx = block_tx;
+ sdata->deflink.csa_block_tx = block_tx;
sdata_info(sdata, "channel switch failed, disconnecting\n");
ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
}
@@ -3220,10 +3463,13 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
+ if (!sdata->deflink.u.ap.next_beacon)
+ return -EINVAL;
+
+ err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ sdata->deflink.u.ap.next_beacon,
NULL, NULL);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ ieee80211_free_next_beacon(&sdata->deflink);
if (err < 0)
return err;
@@ -3268,41 +3514,41 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
* completed successfully
*/
- if (sdata->reserved_chanctx) {
+ if (sdata->deflink.reserved_chanctx) {
/*
* with multi-vif csa driver may call ieee80211_csa_finish()
* many times while waiting for other interfaces to use their
* reservations
*/
- if (sdata->reserved_ready)
+ if (sdata->deflink.reserved_ready)
return 0;
- return ieee80211_vif_use_reserved_context(sdata);
+ return ieee80211_link_use_reserved_context(&sdata->deflink);
}
if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef,
- &sdata->csa_chandef))
+ &sdata->deflink.csa_chandef))
return -EINVAL;
- sdata->vif.csa_active = false;
+ sdata->vif.bss_conf.csa_active = false;
err = ieee80211_set_after_csa_beacon(sdata, &changed);
if (err)
return err;
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
- if (sdata->csa_block_tx) {
+ if (sdata->deflink.csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
+ sdata->deflink.csa_block_tx = false;
}
err = drv_post_channel_switch(sdata);
if (err)
return err;
- cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+ cfg80211_ch_switch_notify(sdata->dev, &sdata->deflink.csa_chandef, 0);
return 0;
}
@@ -3320,7 +3566,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
- csa_finalize_work);
+ deflink.csa_finalize_work);
struct ieee80211_local *local = sdata->local;
sdata_lock(sdata);
@@ -3328,7 +3574,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
mutex_lock(&local->chanctx_mtx);
/* AP might have been stopped while waiting for the lock. */
- if (!sdata->vif.csa_active)
+ if (!sdata->vif.bss_conf.csa_active)
goto unlock;
if (!ieee80211_sdata_running(sdata))
@@ -3351,9 +3597,9 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->u.ap.next_beacon =
+ sdata->deflink.u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_after);
- if (!sdata->u.ap.next_beacon)
+ if (!sdata->deflink.u.ap.next_beacon)
return -ENOMEM;
/*
@@ -3378,8 +3624,10 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
if ((params->n_counter_offsets_beacon >
IEEE80211_MAX_CNTDWN_COUNTERS_NUM) ||
(params->n_counter_offsets_presp >
- IEEE80211_MAX_CNTDWN_COUNTERS_NUM))
+ IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) {
+ ieee80211_free_next_beacon(&sdata->deflink);
return -EINVAL;
+ }
csa.counter_offsets_beacon = params->counter_offsets_beacon;
csa.counter_offsets_presp = params->counter_offsets_presp;
@@ -3387,16 +3635,18 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
csa.count = params->count;
- err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa, NULL);
+ err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ &params->beacon_csa, &csa,
+ NULL);
if (err < 0) {
- kfree(sdata->u.ap.next_beacon);
+ ieee80211_free_next_beacon(&sdata->deflink);
return err;
}
*changed |= err;
break;
case NL80211_IFTYPE_ADHOC:
- if (!sdata->vif.bss_conf.ibss_joined)
+ if (!sdata->vif.cfg.ibss_joined)
return -EINVAL;
if (params->chandef.width != sdata->u.ibss.chandef.width)
@@ -3437,9 +3687,6 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_MESH_POINT: {
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- if (params->chandef.width != sdata->vif.bss_conf.chandef.width)
- return -EINVAL;
-
/* changes into another band are not supported */
if (sdata->vif.bss_conf.chandef.chan->band !=
params->chandef.chan->band)
@@ -3478,9 +3725,9 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata)
{
- sdata->vif.color_change_active = false;
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ sdata->vif.bss_conf.color_change_active = false;
+
+ ieee80211_free_next_beacon(&sdata->deflink);
cfg80211_color_change_aborted_notify(sdata->dev);
}
@@ -3511,11 +3758,11 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
return -EINVAL;
/* don't allow another channel switch if one is already active. */
- if (sdata->vif.csa_active)
+ if (sdata->vif.bss_conf.csa_active)
return -EBUSY;
mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf) {
err = -EBUSY;
@@ -3540,42 +3787,44 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (err)
goto out;
- err = ieee80211_vif_reserve_chanctx(sdata, &params->chandef,
- chanctx->mode,
- params->radar_required);
+ err = ieee80211_link_reserve_chanctx(&sdata->deflink, &params->chandef,
+ chanctx->mode,
+ params->radar_required);
if (err)
goto out;
/* if reservation is invalid then this will fail */
err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0);
if (err) {
- ieee80211_vif_unreserve_chanctx(sdata);
+ ieee80211_link_unreserve_chanctx(&sdata->deflink);
goto out;
}
/* if there is a color change in progress, abort it */
- if (sdata->vif.color_change_active)
+ if (sdata->vif.bss_conf.color_change_active)
ieee80211_color_change_abort(sdata);
err = ieee80211_set_csa_beacon(sdata, params, &changed);
if (err) {
- ieee80211_vif_unreserve_chanctx(sdata);
+ ieee80211_link_unreserve_chanctx(&sdata->deflink);
goto out;
}
- sdata->csa_chandef = params->chandef;
- sdata->csa_block_tx = params->block_tx;
- sdata->vif.csa_active = true;
+ sdata->deflink.csa_chandef = params->chandef;
+ sdata->deflink.csa_block_tx = params->block_tx;
+ sdata->vif.bss_conf.csa_active = true;
- if (sdata->csa_block_tx)
+ if (sdata->deflink.csa_block_tx)
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- cfg80211_ch_switch_started_notify(sdata->dev, &sdata->csa_chandef,
+ cfg80211_ch_switch_started_notify(sdata->dev,
+ &sdata->deflink.csa_chandef, 0,
params->count, params->block_tx);
if (changed) {
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ changed);
drv_channel_switch_beacon(sdata, &params->chandef);
} else {
/* if the beacon didn't change, we can finalize immediately */
@@ -3734,7 +3983,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
mutex_lock(&local->mtx);
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
ret = -EINVAL;
goto unlock;
@@ -3808,17 +4057,25 @@ unlock:
static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
struct wireless_dev *wdev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
struct ieee80211_local *local = wiphy_priv(wiphy);
struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_link_data *link;
int ret = -ENODATA;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link) {
+ ret = -ENOLINK;
+ goto out;
+ }
+
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (chanctx_conf) {
- *chandef = sdata->vif.bss_conf.chandef;
+ *chandef = link->conf->chandef;
ret = 0;
} else if (local->open_count > 0 &&
local->open_count == local->monitors &&
@@ -3829,6 +4086,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
*chandef = local->_oper_chandef;
ret = 0;
}
+out:
rcu_read_unlock();
return ret;
@@ -3868,15 +4126,19 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy,
static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy,
struct net_device *dev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_link_data *link;
int ret;
u32 changed = 0;
- ret = ieee80211_vif_change_bandwidth(sdata, chandef, &changed);
+ link = sdata_dereference(sdata->link[link_id], sdata);
+
+ ret = ieee80211_link_change_bandwidth(link, chandef, &changed);
if (ret == 0)
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, link, changed);
return ret;
}
@@ -4216,10 +4478,13 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_AP: {
int ret;
- ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
+ if (!sdata->deflink.u.ap.next_beacon)
+ return -EINVAL;
+
+ ret = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ sdata->deflink.u.ap.next_beacon,
NULL, NULL);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ ieee80211_free_next_beacon(&sdata->deflink);
if (ret < 0)
return ret;
@@ -4245,9 +4510,9 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
- sdata->u.ap.next_beacon =
+ sdata->deflink.u.ap.next_beacon =
cfg80211_beacon_dup(&params->beacon_next);
- if (!sdata->u.ap.next_beacon)
+ if (!sdata->deflink.u.ap.next_beacon)
return -ENOMEM;
if (params->count <= 1)
@@ -4259,10 +4524,11 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
params->counter_offset_presp;
color_change.count = params->count;
- err = ieee80211_assign_beacon(sdata, &params->beacon_color_change,
+ err = ieee80211_assign_beacon(sdata, &sdata->deflink,
+ &params->beacon_color_change,
NULL, &color_change);
if (err < 0) {
- kfree(sdata->u.ap.next_beacon);
+ ieee80211_free_next_beacon(&sdata->deflink);
return err;
}
*changed |= err;
@@ -4282,7 +4548,7 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.he_bss_color.enabled = enable;
changed |= BSS_CHANGED_HE_BSS_COLOR;
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) {
struct ieee80211_sub_if_data *child;
@@ -4292,8 +4558,9 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) {
child->vif.bss_conf.he_bss_color.color = color;
child->vif.bss_conf.he_bss_color.enabled = enable;
- ieee80211_bss_info_change_notify(child,
- BSS_CHANGED_HE_BSS_COLOR);
+ ieee80211_link_info_change_notify(child,
+ &child->deflink,
+ BSS_CHANGED_HE_BSS_COLOR);
}
}
mutex_unlock(&sdata->local->iflist_mtx);
@@ -4309,7 +4576,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
sdata_assert_lock(sdata);
lockdep_assert_held(&local->mtx);
- sdata->vif.color_change_active = false;
+ sdata->vif.bss_conf.color_change_active = false;
err = ieee80211_set_after_color_change_beacon(sdata, &changed);
if (err) {
@@ -4318,7 +4585,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
}
ieee80211_color_change_bss_config_notify(sdata,
- sdata->vif.color_change_color,
+ sdata->vif.bss_conf.color_change_color,
1, changed);
cfg80211_color_change_notify(sdata->dev);
@@ -4329,14 +4596,14 @@ void ieee80211_color_change_finalize_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
- color_change_finalize_work);
+ deflink.color_change_finalize_work);
struct ieee80211_local *local = sdata->local;
sdata_lock(sdata);
mutex_lock(&local->mtx);
/* AP might have been stopped while waiting for the lock. */
- if (!sdata->vif.color_change_active)
+ if (!sdata->vif.bss_conf.color_change_active)
goto unlock;
if (!ieee80211_sdata_running(sdata))
@@ -4354,20 +4621,20 @@ void ieee80211_color_change_finish(struct ieee80211_vif *vif)
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
ieee80211_queue_work(&sdata->local->hw,
- &sdata->color_change_finalize_work);
+ &sdata->deflink.color_change_finalize_work);
}
EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap)
+ u64 color_bitmap, gfp_t gfp)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- if (sdata->vif.color_change_active || sdata->vif.csa_active)
+ if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active)
return;
- cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap);
+ cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp);
}
EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
@@ -4390,7 +4657,7 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
/* don't allow another color change if one is already active or if csa
* is active
*/
- if (sdata->vif.color_change_active || sdata->vif.csa_active) {
+ if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) {
err = -EBUSY;
goto out;
}
@@ -4399,8 +4666,8 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
if (err)
goto out;
- sdata->vif.color_change_active = true;
- sdata->vif.color_change_color = params->color;
+ sdata->vif.bss_conf.color_change_active = true;
+ sdata->vif.bss_conf.color_change_color = params->color;
cfg80211_color_change_started_notify(sdata->dev, params->count);
@@ -4428,6 +4695,139 @@ ieee80211_set_radar_background(struct wiphy *wiphy,
return local->ops->set_radar_background(&local->hw, chandef);
}
+static int ieee80211_add_intf_link(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+ if (wdev->use_4addr)
+ return -EOPNOTSUPP;
+
+ return ieee80211_vif_set_links(sdata, wdev->valid_links);
+}
+
+static void ieee80211_del_intf_link(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+ ieee80211_vif_set_links(sdata, wdev->valid_links);
+}
+
+static int sta_add_link_station(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct link_station_parameters *params)
+{
+ struct sta_info *sta;
+ int ret;
+
+ sta = sta_info_get_bss(sdata, params->mld_mac);
+ if (!sta)
+ return -ENOENT;
+
+ if (!sta->sta.valid_links)
+ return -EINVAL;
+
+ if (sta->sta.valid_links & BIT(params->link_id))
+ return -EALREADY;
+
+ ret = ieee80211_sta_allocate_link(sta, params->link_id);
+ if (ret)
+ return ret;
+
+ ret = sta_link_apply_parameters(local, sta, true, params);
+ if (ret) {
+ ieee80211_sta_free_link(sta, params->link_id);
+ return ret;
+ }
+
+ /* ieee80211_sta_activate_link frees the link upon failure */
+ return ieee80211_sta_activate_link(sta, params->link_id);
+}
+
+static int
+ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_parameters *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ int ret;
+
+ mutex_lock(&sdata->local->sta_mtx);
+ ret = sta_add_link_station(local, sdata, params);
+ mutex_unlock(&sdata->local->sta_mtx);
+
+ return ret;
+}
+
+static int sta_mod_link_station(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct link_station_parameters *params)
+{
+ struct sta_info *sta;
+
+ sta = sta_info_get_bss(sdata, params->mld_mac);
+ if (!sta)
+ return -ENOENT;
+
+ if (!(sta->sta.valid_links & BIT(params->link_id)))
+ return -EINVAL;
+
+ return sta_link_apply_parameters(local, sta, false, params);
+}
+
+static int
+ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_parameters *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ int ret;
+
+ mutex_lock(&sdata->local->sta_mtx);
+ ret = sta_mod_link_station(local, sdata, params);
+ mutex_unlock(&sdata->local->sta_mtx);
+
+ return ret;
+}
+
+static int sta_del_link_station(struct ieee80211_sub_if_data *sdata,
+ struct link_station_del_parameters *params)
+{
+ struct sta_info *sta;
+
+ sta = sta_info_get_bss(sdata, params->mld_mac);
+ if (!sta)
+ return -ENOENT;
+
+ if (!(sta->sta.valid_links & BIT(params->link_id)))
+ return -EINVAL;
+
+ /* must not create a STA without links */
+ if (sta->sta.valid_links == BIT(params->link_id))
+ return -EINVAL;
+
+ ieee80211_sta_remove_link(sta, params->link_id);
+
+ return 0;
+}
+
+static int
+ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev,
+ struct link_station_del_parameters *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ int ret;
+
+ mutex_lock(&sdata->local->sta_mtx);
+ ret = sta_del_link_station(sdata, params);
+ mutex_unlock(&sdata->local->sta_mtx);
+
+ return ret;
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -4533,4 +4933,9 @@ const struct cfg80211_ops mac80211_config_ops = {
.set_sar_specs = ieee80211_set_sar_specs,
.color_change = ieee80211_color_change,
.set_radar_background = ieee80211_set_radar_background,
+ .add_intf_link = ieee80211_add_intf_link,
+ .del_intf_link = ieee80211_del_intf_link,
+ .add_link_station = ieee80211_add_link_station,
+ .mod_link_station = ieee80211_mod_link_station,
+ .del_link_station = ieee80211_del_link_station,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 76fc36a68750..e72cf0749d49 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211 - channel management
- * Copyright 2020 - 2021 Intel Corporation
+ * Copyright 2020 - 2022 Intel Corporation
*/
#include <linux/nl80211.h>
@@ -15,12 +15,12 @@
static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
int num = 0;
lockdep_assert_held(&local->chanctx_mtx);
- list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list)
+ list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list)
num++;
return num;
@@ -29,12 +29,12 @@ static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local,
static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
int num = 0;
lockdep_assert_held(&local->chanctx_mtx);
- list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list)
+ list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list)
num++;
return num;
@@ -67,12 +67,12 @@ static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local)
}
static struct ieee80211_chanctx *
-ieee80211_vif_get_chanctx(struct ieee80211_sub_if_data *sdata)
+ieee80211_link_get_chanctx(struct ieee80211_link_data *link)
{
- struct ieee80211_local *local __maybe_unused = sdata->local;
+ struct ieee80211_local *local __maybe_unused = link->sdata->local;
struct ieee80211_chanctx_conf *conf;
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf)
return NULL;
@@ -85,16 +85,16 @@ ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
const struct cfg80211_chan_def *compat)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
lockdep_assert_held(&local->chanctx_mtx);
- list_for_each_entry(sdata, &ctx->reserved_vifs,
+ list_for_each_entry(link, &ctx->reserved_links,
reserved_chanctx_list) {
if (!compat)
- compat = &sdata->reserved_chandef;
+ compat = &link->reserved_chandef;
- compat = cfg80211_chandef_compatible(&sdata->reserved_chandef,
+ compat = cfg80211_chandef_compatible(&link->reserved_chandef,
compat);
if (!compat)
break;
@@ -108,20 +108,22 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
const struct cfg80211_chan_def *compat)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
lockdep_assert_held(&local->chanctx_mtx);
- list_for_each_entry(sdata, &ctx->assigned_vifs,
+ list_for_each_entry(link, &ctx->assigned_links,
assigned_chanctx_list) {
- if (sdata->reserved_chanctx != NULL)
+ struct ieee80211_bss_conf *link_conf = link->conf;
+
+ if (link->reserved_chanctx)
continue;
if (!compat)
- compat = &sdata->vif.bss_conf.chandef;
+ compat = &link_conf->chandef;
compat = cfg80211_chandef_compatible(
- &sdata->vif.bss_conf.chandef, compat);
+ &link_conf->chandef, compat);
if (!compat)
break;
}
@@ -157,7 +159,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local,
if (ieee80211_chanctx_combined_chandef(local, ctx, def))
return true;
- if (!list_empty(&ctx->reserved_vifs) &&
+ if (!list_empty(&ctx->reserved_links) &&
ieee80211_chanctx_reserved_chandef(local, ctx, def))
return true;
@@ -193,13 +195,23 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
return NULL;
}
-static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta)
+static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta,
+ unsigned int link_id)
{
- enum ieee80211_sta_rx_bandwidth width = ieee80211_sta_cap_rx_bw(sta);
+ enum ieee80211_sta_rx_bandwidth width;
+ struct link_sta_info *link_sta;
+
+ link_sta = rcu_dereference(sta->link[link_id]);
+
+ /* no effect if this STA has no presence on this link */
+ if (!link_sta)
+ return NL80211_CHAN_WIDTH_20_NOHT;
+
+ width = ieee80211_sta_cap_rx_bw(link_sta);
switch (width) {
case IEEE80211_STA_RX_BW_20:
- if (sta->sta.ht_cap.ht_supported)
+ if (link_sta->pub->ht_cap.ht_supported)
return NL80211_CHAN_WIDTH_20;
else
return NL80211_CHAN_WIDTH_20_NOHT;
@@ -218,6 +230,8 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta)
* might be smaller than the configured bw (160).
*/
return NL80211_CHAN_WIDTH_160;
+ case IEEE80211_STA_RX_BW_320:
+ return NL80211_CHAN_WIDTH_320;
default:
WARN_ON(1);
return NL80211_CHAN_WIDTH_20;
@@ -225,46 +239,47 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta)
}
static enum nl80211_chan_width
-ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata)
+ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata,
+ unsigned int link_id)
{
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct sta_info *sta;
- rcu_read_lock();
list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
if (sdata != sta->sdata &&
!(sta->sdata->bss && sta->sdata->bss == sdata->bss))
continue;
- max_bw = max(max_bw, ieee80211_get_sta_bw(sta));
+ max_bw = max(max_bw, ieee80211_get_sta_bw(sta, link_id));
}
- rcu_read_unlock();
return max_bw;
}
static enum nl80211_chan_width
-ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
- struct ieee80211_chanctx_conf *conf)
+ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_chanctx_conf *conf)
{
- struct ieee80211_sub_if_data *sdata;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
+ struct ieee80211_vif *vif = &sdata->vif;
+ int link_id;
rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- struct ieee80211_vif *vif = &sdata->vif;
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT;
+ struct ieee80211_bss_conf *link_conf =
+ rcu_dereference(sdata->vif.link_conf[link_id]);
- if (!ieee80211_sdata_running(sdata))
+ if (!link_conf)
continue;
- if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf)
+ if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
continue;
switch (vif->type) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
- width = ieee80211_get_max_required_bw(sdata);
+ width = ieee80211_get_max_required_bw(sdata, link_id);
break;
case NL80211_IFTYPE_STATION:
/*
@@ -272,8 +287,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
* point, so take the width from the chandef, but
* account also for TDLS peers
*/
- width = max(vif->bss_conf.chandef.width,
- ieee80211_get_max_required_bw(sdata));
+ width = max(link_conf->chandef.width,
+ ieee80211_get_max_required_bw(sdata, link_id));
break;
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_NAN:
@@ -281,7 +296,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
- width = vif->bss_conf.chandef.width;
+ width = link_conf->chandef.width;
break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_UNSPECIFIED:
@@ -291,12 +306,37 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
case NL80211_IFTYPE_P2P_GO:
WARN_ON_ONCE(1);
}
+
+ max_bw = max(max_bw, width);
+ }
+ rcu_read_unlock();
+
+ return max_bw;
+}
+
+static enum nl80211_chan_width
+ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
+ struct ieee80211_chanctx_conf *conf)
+{
+ struct ieee80211_sub_if_data *sdata;
+ enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ enum nl80211_chan_width width;
+
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+
+ width = ieee80211_get_chanctx_vif_max_required_bw(sdata, conf);
+
max_bw = max(max_bw, width);
}
/* use the configured bandwidth in case of monitor interface */
sdata = rcu_dereference(local->monitor_sdata);
- if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf)
+ if (sdata &&
+ rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == conf)
max_bw = max(max_bw, conf->def.width);
rcu_read_unlock();
@@ -348,7 +388,7 @@ static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
}
/* calling this function is assuming that station vif is updated to
- * lates changes by calling ieee80211_vif_update_chandef
+ * lates changes by calling ieee80211_link_update_chandef
*/
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
@@ -361,29 +401,43 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
rcu_read_lock();
list_for_each_entry_rcu(sta, &local->sta_list,
list) {
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
enum ieee80211_sta_rx_bandwidth new_sta_bw;
+ unsigned int link_id;
if (!ieee80211_sdata_running(sta->sdata))
continue;
- if (rcu_access_pointer(sta->sdata->vif.chanctx_conf) !=
- &ctx->conf)
- continue;
+ for (link_id = 0; link_id < ARRAY_SIZE(sta->sdata->link); link_id++) {
+ struct ieee80211_bss_conf *link_conf =
+ rcu_dereference(sdata->vif.link_conf[link_id]);
+ struct link_sta_info *link_sta;
- new_sta_bw = ieee80211_sta_cur_vht_bw(sta);
+ if (!link_conf)
+ continue;
- /* nothing change */
- if (new_sta_bw == sta->sta.bandwidth)
- continue;
+ if (rcu_access_pointer(link_conf->chanctx_conf) != &ctx->conf)
+ continue;
- /* vif changed to narrow BW and narrow BW for station wasn't
- * requested or vise versa */
- if ((new_sta_bw < sta->sta.bandwidth) == !narrowed)
- continue;
+ link_sta = rcu_dereference(sta->link[link_id]);
+ if (!link_sta)
+ continue;
+
+ new_sta_bw = ieee80211_sta_cur_vht_bw(link_sta);
+
+ /* nothing change */
+ if (new_sta_bw == link_sta->pub->bandwidth)
+ continue;
+
+ /* vif changed to narrow BW and narrow BW for station wasn't
+ * requested or vise versa */
+ if ((new_sta_bw < link_sta->pub->bandwidth) == !narrowed)
+ continue;
- sta->sta.bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta,
- IEEE80211_RC_BW_CHANGED);
+ link_sta->pub->bandwidth = new_sta_bw;
+ rate_control_rate_update(local, sband, sta, link_id,
+ IEEE80211_RC_BW_CHANGED);
+ }
}
rcu_read_unlock();
}
@@ -417,7 +471,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
{
u32 changed;
- /* expected to handle only 20/40/80/160 channel widths */
+ /* expected to handle only 20/40/80/160/320 channel widths */
switch (chandef->width) {
case NL80211_CHAN_WIDTH_20_NOHT:
case NL80211_CHAN_WIDTH_20:
@@ -425,6 +479,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
case NL80211_CHAN_WIDTH_80:
case NL80211_CHAN_WIDTH_80P80:
case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_320:
break;
default:
WARN_ON(1);
@@ -505,9 +560,17 @@ bool ieee80211_is_radar_required(struct ieee80211_local *local)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->radar_required) {
- rcu_read_unlock();
- return true;
+ unsigned int link_id;
+
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = rcu_dereference(sdata->link[link_id]);
+
+ if (link && link->radar_required) {
+ rcu_read_unlock();
+ return true;
+ }
}
}
rcu_read_unlock();
@@ -528,15 +591,27 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local,
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ unsigned int link_id;
+
if (!ieee80211_sdata_running(sdata))
continue;
- if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf)
- continue;
- if (!sdata->radar_required)
- continue;
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_link_data *link;
- required = true;
- break;
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ continue;
+
+ if (rcu_access_pointer(link->conf->chanctx_conf) != conf)
+ continue;
+ if (!link->radar_required)
+ continue;
+ required = true;
+ break;
+ }
+
+ if (required)
+ break;
}
rcu_read_unlock();
@@ -556,8 +631,8 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
if (!ctx)
return NULL;
- INIT_LIST_HEAD(&ctx->assigned_vifs);
- INIT_LIST_HEAD(&ctx->reserved_vifs);
+ INIT_LIST_HEAD(&ctx->assigned_links);
+ INIT_LIST_HEAD(&ctx->reserved_links);
ctx->conf.def = *chandef;
ctx->conf.rx_chains_static = 1;
ctx->conf.rx_chains_dynamic = 1;
@@ -683,21 +758,32 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ int link_id;
if (!ieee80211_sdata_running(sdata))
continue;
- if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf)
- continue;
+
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
continue;
- if (!compat)
- compat = &sdata->vif.bss_conf.chandef;
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_bss_conf *link_conf =
+ rcu_dereference(sdata->vif.link_conf[link_id]);
- compat = cfg80211_chandef_compatible(
- &sdata->vif.bss_conf.chandef, compat);
- if (WARN_ON_ONCE(!compat))
- break;
+ if (!link_conf)
+ continue;
+
+ if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
+ continue;
+
+ if (!compat)
+ compat = &link_conf->chandef;
+
+ compat = cfg80211_chandef_compatible(&link_conf->chandef,
+ compat);
+ if (WARN_ON_ONCE(!compat))
+ break;
+ }
}
/* TDLS peers can sometimes affect the chandef width */
@@ -745,9 +831,10 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR);
}
-static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *new_ctx)
+static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
+ struct ieee80211_chanctx *new_ctx)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *curr_ctx = NULL;
@@ -756,31 +843,31 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
return -ENOTSUPP;
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (conf) {
curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
- drv_unassign_vif_chanctx(local, sdata, curr_ctx);
+ drv_unassign_vif_chanctx(local, sdata, link->conf, curr_ctx);
conf = NULL;
- list_del(&sdata->assigned_chanctx_list);
+ list_del(&link->assigned_chanctx_list);
}
if (new_ctx) {
- ret = drv_assign_vif_chanctx(local, sdata, new_ctx);
+ ret = drv_assign_vif_chanctx(local, sdata, link->conf, new_ctx);
if (ret)
goto out;
conf = &new_ctx->conf;
- list_add(&sdata->assigned_chanctx_list,
- &new_ctx->assigned_vifs);
+ list_add(&link->assigned_chanctx_list,
+ &new_ctx->assigned_links);
}
out:
- rcu_assign_pointer(sdata->vif.chanctx_conf, conf);
+ rcu_assign_pointer(link->conf->chanctx_conf, conf);
- sdata->vif.bss_conf.idle = !conf;
+ sdata->vif.cfg.idle = !conf;
if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
ieee80211_recalc_chanctx_chantype(local, curr_ctx);
@@ -796,8 +883,7 @@ out:
if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
sdata->vif.type != NL80211_IFTYPE_MONITOR)
- ieee80211_bss_info_change_notify(sdata,
- BSS_CHANGED_IDLE);
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
ieee80211_check_fast_xmit_iface(sdata);
@@ -818,60 +904,64 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
u8 needed_static, needed_dynamic;
+ unsigned int link_id;
if (!ieee80211_sdata_running(sdata))
continue;
- if (rcu_access_pointer(sdata->vif.chanctx_conf) !=
- &chanctx->conf)
- continue;
-
switch (sdata->vif.type) {
- case NL80211_IFTYPE_P2P_DEVICE:
- case NL80211_IFTYPE_NAN:
- continue;
case NL80211_IFTYPE_STATION:
if (!sdata->u.mgd.associated)
continue;
break;
- case NL80211_IFTYPE_AP_VLAN:
- continue;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
break;
default:
- WARN_ON_ONCE(1);
+ continue;
}
- switch (sdata->smps_mode) {
- default:
- WARN_ONCE(1, "Invalid SMPS mode %d\n",
- sdata->smps_mode);
- fallthrough;
- case IEEE80211_SMPS_OFF:
- needed_static = sdata->needed_rx_chains;
- needed_dynamic = sdata->needed_rx_chains;
- break;
- case IEEE80211_SMPS_DYNAMIC:
- needed_static = 1;
- needed_dynamic = sdata->needed_rx_chains;
- break;
- case IEEE80211_SMPS_STATIC:
- needed_static = 1;
- needed_dynamic = 1;
- break;
- }
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = rcu_dereference(sdata->link[link_id]);
+
+ if (!link)
+ continue;
+
+ if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf)
+ continue;
- rx_chains_static = max(rx_chains_static, needed_static);
- rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
+ switch (link->smps_mode) {
+ default:
+ WARN_ONCE(1, "Invalid SMPS mode %d\n",
+ link->smps_mode);
+ fallthrough;
+ case IEEE80211_SMPS_OFF:
+ needed_static = link->needed_rx_chains;
+ needed_dynamic = link->needed_rx_chains;
+ break;
+ case IEEE80211_SMPS_DYNAMIC:
+ needed_static = 1;
+ needed_dynamic = link->needed_rx_chains;
+ break;
+ case IEEE80211_SMPS_STATIC:
+ needed_static = 1;
+ needed_dynamic = 1;
+ break;
+ }
+
+ rx_chains_static = max(rx_chains_static, needed_static);
+ rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic);
+ }
}
/* Disable SMPS for the monitor interface */
sdata = rcu_dereference(local->monitor_sdata);
if (sdata &&
- rcu_access_pointer(sdata->vif.chanctx_conf) == &chanctx->conf)
+ rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &chanctx->conf)
rx_chains_dynamic = rx_chains_static = local->rx_chains;
rcu_read_unlock();
@@ -896,9 +986,12 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
}
static void
-__ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
- bool clear)
+__ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
+ bool clear)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ unsigned int link_id = link->link_id;
+ struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local __maybe_unused = sdata->local;
struct ieee80211_sub_if_data *vlan;
struct ieee80211_chanctx_conf *conf;
@@ -914,40 +1007,50 @@ __ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
* channel context pointer for a while, possibly pointing
* to a channel context that has already been freed.
*/
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link_conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
WARN_ON(!conf);
if (clear)
conf = NULL;
- list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
- rcu_assign_pointer(vlan->vif.chanctx_conf, conf);
+ rcu_read_lock();
+ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
+ struct ieee80211_bss_conf *vlan_conf;
+
+ vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]);
+ if (WARN_ON(!vlan_conf))
+ continue;
+
+ rcu_assign_pointer(vlan_conf->chanctx_conf, conf);
+ }
+ rcu_read_unlock();
}
-void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
- bool clear)
+void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
+ bool clear)
{
- struct ieee80211_local *local = sdata->local;
+ struct ieee80211_local *local = link->sdata->local;
mutex_lock(&local->chanctx_mtx);
- __ieee80211_vif_copy_chanctx_to_vlans(sdata, clear);
+ __ieee80211_link_copy_chanctx_to_vlans(link, clear);
mutex_unlock(&local->chanctx_mtx);
}
-int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata)
+int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
{
- struct ieee80211_chanctx *ctx = sdata->reserved_chanctx;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_chanctx *ctx = link->reserved_chanctx;
lockdep_assert_held(&sdata->local->chanctx_mtx);
if (WARN_ON(!ctx))
return -EINVAL;
- list_del(&sdata->reserved_chanctx_list);
- sdata->reserved_chanctx = NULL;
+ list_del(&link->reserved_chanctx_list);
+ link->reserved_chanctx = NULL;
if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) {
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) {
@@ -972,17 +1075,18 @@ int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata)
return 0;
}
-int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode,
- bool radar_required)
+int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode,
+ bool radar_required)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx;
lockdep_assert_held(&local->chanctx_mtx);
- curr_ctx = ieee80211_vif_get_chanctx(sdata);
+ curr_ctx = ieee80211_link_get_chanctx(link);
if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx)
return -ENOTSUPP;
@@ -996,11 +1100,11 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
if (!curr_ctx ||
(curr_ctx->replace_state ==
IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
- !list_empty(&curr_ctx->reserved_vifs)) {
+ !list_empty(&curr_ctx->reserved_links)) {
/*
- * Another vif already requested this context
+ * Another link already requested this context
* for a reservation. Find another one hoping
- * all vifs assigned to it will also switch
+ * all links assigned to it will also switch
* soon enough.
*
* TODO: This needs a little more work as some
@@ -1009,13 +1113,13 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
* provided some channel context juggling was
* performed.
*
- * Consider ctx1..3, vif1..6, each ctx has 2
- * vifs. vif1 and vif2 from ctx1 request new
+ * Consider ctx1..3, link1..6, each ctx has 2
+ * links. link1 and link2 from ctx1 request new
* different chandefs starting 2 in-place
* reserations with ctx4 and ctx5 replacing
- * ctx1 and ctx2 respectively. Next vif5 and
- * vif6 from ctx3 reserve ctx4. If vif3 and
- * vif4 remain on ctx2 as they are then this
+ * ctx1 and ctx2 respectively. Next link5 and
+ * link6 from ctx3 reserve ctx4. If link3 and
+ * link4 remain on ctx2 as they are then this
* fails unless `replace_ctx` from ctx5 is
* replaced with ctx3.
*/
@@ -1025,7 +1129,7 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHANCTX_REPLACE_NONE)
continue;
- if (!list_empty(&ctx->reserved_vifs))
+ if (!list_empty(&ctx->reserved_links))
continue;
curr_ctx = ctx;
@@ -1040,7 +1144,7 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
if (!curr_ctx ||
(curr_ctx->replace_state ==
IEEE80211_CHANCTX_WILL_BE_REPLACED) ||
- !list_empty(&curr_ctx->reserved_vifs))
+ !list_empty(&curr_ctx->reserved_links))
return -EBUSY;
new_ctx = ieee80211_alloc_chanctx(local, chandef, mode);
@@ -1059,29 +1163,31 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
}
}
- list_add(&sdata->reserved_chanctx_list, &new_ctx->reserved_vifs);
- sdata->reserved_chanctx = new_ctx;
- sdata->reserved_chandef = *chandef;
- sdata->reserved_radar_required = radar_required;
- sdata->reserved_ready = false;
+ list_add(&link->reserved_chanctx_list, &new_ctx->reserved_links);
+ link->reserved_chanctx = new_ctx;
+ link->reserved_chandef = *chandef;
+ link->reserved_radar_required = radar_required;
+ link->reserved_ready = false;
return 0;
}
static void
-ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata)
+ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
ieee80211_queue_work(&sdata->local->hw,
- &sdata->csa_finalize_work);
+ &link->csa_finalize_work);
break;
case NL80211_IFTYPE_STATION:
ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.chswitch_work);
+ &link->u.mgd.chswitch_work);
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_AP_VLAN:
@@ -1098,23 +1204,36 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata)
}
static void
-ieee80211_vif_update_chandef(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef)
+ieee80211_link_update_chandef(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ unsigned int link_id = link->link_id;
struct ieee80211_sub_if_data *vlan;
- sdata->vif.bss_conf.chandef = *chandef;
+ link->conf->chandef = *chandef;
if (sdata->vif.type != NL80211_IFTYPE_AP)
return;
- list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
- vlan->vif.bss_conf.chandef = *chandef;
+ rcu_read_lock();
+ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
+ struct ieee80211_bss_conf *vlan_conf;
+
+ vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]);
+ if (WARN_ON(!vlan_conf))
+ continue;
+
+ vlan_conf->chandef = *chandef;
+ }
+ rcu_read_unlock();
}
static int
-ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
+ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
struct ieee80211_vif_chanctx_switch vif_chsw[1] = {};
struct ieee80211_chanctx *old_ctx, *new_ctx;
@@ -1125,10 +1244,10 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
- new_ctx = sdata->reserved_chanctx;
- old_ctx = ieee80211_vif_get_chanctx(sdata);
+ new_ctx = link->reserved_chanctx;
+ old_ctx = ieee80211_link_get_chanctx(link);
- if (WARN_ON(!sdata->reserved_ready))
+ if (WARN_ON(!link->reserved_ready))
return -EBUSY;
if (WARN_ON(!new_ctx))
@@ -1142,23 +1261,24 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
return -EINVAL;
chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
- &sdata->reserved_chandef);
+ &link->reserved_chandef);
if (WARN_ON(!chandef))
return -EINVAL;
- if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
+ if (link_conf->chandef.width != link->reserved_chandef.width)
changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
+ ieee80211_link_update_chandef(link, &link->reserved_chandef);
ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef);
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
vif_chsw[0].new_ctx = &new_ctx->conf;
+ vif_chsw[0].link_conf = link->conf;
- list_del(&sdata->reserved_chanctx_list);
- sdata->reserved_chanctx = NULL;
+ list_del(&link->reserved_chanctx_list);
+ link->reserved_chanctx = NULL;
err = drv_switch_vif_chanctx(local, vif_chsw, 1,
CHANCTX_SWMODE_REASSIGN_VIF);
@@ -1169,11 +1289,11 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
goto out;
}
- list_move(&sdata->assigned_chanctx_list, &new_ctx->assigned_vifs);
- rcu_assign_pointer(sdata->vif.chanctx_conf, &new_ctx->conf);
+ list_move(&link->assigned_chanctx_list, &new_ctx->assigned_links);
+ rcu_assign_pointer(link_conf->chanctx_conf, &new_ctx->conf);
if (sdata->vif.type == NL80211_IFTYPE_AP)
- __ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+ __ieee80211_link_copy_chanctx_to_vlans(link, false);
ieee80211_check_fast_xmit_iface(sdata);
@@ -1185,25 +1305,26 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
ieee80211_recalc_radar_chanctx(local, new_ctx);
if (changed)
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, link, changed);
out:
- ieee80211_vif_chanctx_reservation_complete(sdata);
+ ieee80211_link_chanctx_reservation_complete(link);
return err;
}
static int
-ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
+ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *old_ctx, *new_ctx;
const struct cfg80211_chan_def *chandef;
int err;
- old_ctx = ieee80211_vif_get_chanctx(sdata);
- new_ctx = sdata->reserved_chanctx;
+ old_ctx = ieee80211_link_get_chanctx(link);
+ new_ctx = link->reserved_chanctx;
- if (WARN_ON(!sdata->reserved_ready))
+ if (WARN_ON(!link->reserved_ready))
return -EINVAL;
if (WARN_ON(old_ctx))
@@ -1217,16 +1338,16 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
return -EINVAL;
chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx,
- &sdata->reserved_chandef);
+ &link->reserved_chandef);
if (WARN_ON(!chandef))
return -EINVAL;
ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef);
- list_del(&sdata->reserved_chanctx_list);
- sdata->reserved_chanctx = NULL;
+ list_del(&link->reserved_chanctx_list);
+ link->reserved_chanctx = NULL;
- err = ieee80211_assign_vif_chanctx(sdata, new_ctx);
+ err = ieee80211_assign_link_chanctx(link, new_ctx);
if (err) {
if (ieee80211_chanctx_refcount(local, new_ctx) == 0)
ieee80211_free_chanctx(local, new_ctx);
@@ -1235,19 +1356,20 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
}
out:
- ieee80211_vif_chanctx_reservation_complete(sdata);
+ ieee80211_link_chanctx_reservation_complete(link);
return err;
}
static bool
-ieee80211_vif_has_in_place_reservation(struct ieee80211_sub_if_data *sdata)
+ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_chanctx *old_ctx, *new_ctx;
lockdep_assert_held(&sdata->local->chanctx_mtx);
- new_ctx = sdata->reserved_chanctx;
- old_ctx = ieee80211_vif_get_chanctx(sdata);
+ new_ctx = link->reserved_chanctx;
+ old_ctx = ieee80211_link_get_chanctx(link);
if (!old_ctx)
return false;
@@ -1287,7 +1409,7 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
int n_vifs)
{
struct ieee80211_vif_chanctx_switch *vif_chsw;
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
struct ieee80211_chanctx *ctx, *old_ctx;
int i, err;
@@ -1308,16 +1430,16 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
goto out;
}
- list_for_each_entry(sdata, &ctx->reserved_vifs,
+ list_for_each_entry(link, &ctx->reserved_links,
reserved_chanctx_list) {
- if (!ieee80211_vif_has_in_place_reservation(
- sdata))
+ if (!ieee80211_link_has_in_place_reservation(link))
continue;
- old_ctx = ieee80211_vif_get_chanctx(sdata);
- vif_chsw[i].vif = &sdata->vif;
+ old_ctx = ieee80211_link_get_chanctx(link);
+ vif_chsw[i].vif = &link->sdata->vif;
vif_chsw[i].old_ctx = &old_ctx->conf;
vif_chsw[i].new_ctx = &ctx->conf;
+ vif_chsw[i].link_conf = link->conf;
i++;
}
@@ -1343,7 +1465,7 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local)
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
- if (!list_empty(&ctx->replace_ctx->assigned_vifs))
+ if (!list_empty(&ctx->replace_ctx->assigned_links))
continue;
ieee80211_del_chanctx(local, ctx->replace_ctx);
@@ -1360,7 +1482,7 @@ err:
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
- if (!list_empty(&ctx->replace_ctx->assigned_vifs))
+ if (!list_empty(&ctx->replace_ctx->assigned_links))
continue;
ieee80211_del_chanctx(local, ctx);
@@ -1372,7 +1494,6 @@ err:
static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
{
- struct ieee80211_sub_if_data *sdata, *sdata_tmp;
struct ieee80211_chanctx *ctx, *ctx_tmp, *old_ctx;
struct ieee80211_chanctx *new_ctx = NULL;
int err, n_assigned, n_reserved, n_ready;
@@ -1398,6 +1519,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
*/
list_for_each_entry(ctx, &local->chanctx_list, list) {
+ struct ieee80211_link_data *link;
+
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
@@ -1415,12 +1538,12 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
n_reserved = 0;
n_ready = 0;
- list_for_each_entry(sdata, &ctx->replace_ctx->assigned_vifs,
+ list_for_each_entry(link, &ctx->replace_ctx->assigned_links,
assigned_chanctx_list) {
n_assigned++;
- if (sdata->reserved_chanctx) {
+ if (link->reserved_chanctx) {
n_reserved++;
- if (sdata->reserved_ready)
+ if (link->reserved_ready)
n_ready++;
}
}
@@ -1437,13 +1560,13 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
}
ctx->conf.radar_enabled = false;
- list_for_each_entry(sdata, &ctx->reserved_vifs,
+ list_for_each_entry(link, &ctx->reserved_links,
reserved_chanctx_list) {
- if (ieee80211_vif_has_in_place_reservation(sdata) &&
- !sdata->reserved_ready)
+ if (ieee80211_link_has_in_place_reservation(link) &&
+ !link->reserved_ready)
return -EAGAIN;
- old_ctx = ieee80211_vif_get_chanctx(sdata);
+ old_ctx = ieee80211_link_get_chanctx(link);
if (old_ctx) {
if (old_ctx->replace_state ==
IEEE80211_CHANCTX_WILL_BE_REPLACED)
@@ -1454,7 +1577,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
n_vifs_ctxless++;
}
- if (sdata->reserved_radar_required)
+ if (link->reserved_radar_required)
ctx->conf.radar_enabled = true;
}
}
@@ -1497,6 +1620,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
* context(s).
*/
list_for_each_entry(ctx, &local->chanctx_list, list) {
+ struct ieee80211_link_data *link, *link_tmp;
+
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
@@ -1505,31 +1630,34 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
goto err;
}
- list_for_each_entry(sdata, &ctx->reserved_vifs,
+ list_for_each_entry(link, &ctx->reserved_links,
reserved_chanctx_list) {
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *link_conf = link->conf;
u32 changed = 0;
- if (!ieee80211_vif_has_in_place_reservation(sdata))
+ if (!ieee80211_link_has_in_place_reservation(link))
continue;
- rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf);
+ rcu_assign_pointer(link_conf->chanctx_conf,
+ &ctx->conf);
if (sdata->vif.type == NL80211_IFTYPE_AP)
- __ieee80211_vif_copy_chanctx_to_vlans(sdata,
- false);
+ __ieee80211_link_copy_chanctx_to_vlans(link,
+ false);
ieee80211_check_fast_xmit_iface(sdata);
- sdata->radar_required = sdata->reserved_radar_required;
+ link->radar_required = link->reserved_radar_required;
- if (sdata->vif.bss_conf.chandef.width !=
- sdata->reserved_chandef.width)
+ if (link_conf->chandef.width != link->reserved_chandef.width)
changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
+ ieee80211_link_update_chandef(link, &link->reserved_chandef);
if (changed)
- ieee80211_bss_info_change_notify(sdata,
- changed);
+ ieee80211_link_info_change_notify(sdata,
+ link,
+ changed);
ieee80211_recalc_txpower(sdata, false);
}
@@ -1539,17 +1667,17 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
ieee80211_recalc_radar_chanctx(local, ctx);
ieee80211_recalc_chanctx_min_def(local, ctx);
- list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
+ list_for_each_entry_safe(link, link_tmp, &ctx->reserved_links,
reserved_chanctx_list) {
- if (ieee80211_vif_get_chanctx(sdata) != ctx)
+ if (ieee80211_link_get_chanctx(link) != ctx)
continue;
- list_del(&sdata->reserved_chanctx_list);
- list_move(&sdata->assigned_chanctx_list,
- &ctx->assigned_vifs);
- sdata->reserved_chanctx = NULL;
+ list_del(&link->reserved_chanctx_list);
+ list_move(&link->assigned_chanctx_list,
+ &ctx->assigned_links);
+ link->reserved_chanctx = NULL;
- ieee80211_vif_chanctx_reservation_complete(sdata);
+ ieee80211_link_chanctx_reservation_complete(link);
}
/*
@@ -1559,31 +1687,29 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
* reservation for originally requested interface has already
* succeeded at this point.
*/
- list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
+ list_for_each_entry_safe(link, link_tmp, &ctx->reserved_links,
reserved_chanctx_list) {
- if (WARN_ON(ieee80211_vif_has_in_place_reservation(
- sdata)))
+ if (WARN_ON(ieee80211_link_has_in_place_reservation(link)))
continue;
- if (WARN_ON(sdata->reserved_chanctx != ctx))
+ if (WARN_ON(link->reserved_chanctx != ctx))
continue;
- if (!sdata->reserved_ready)
+ if (!link->reserved_ready)
continue;
- if (ieee80211_vif_get_chanctx(sdata))
- err = ieee80211_vif_use_reserved_reassign(
- sdata);
+ if (ieee80211_link_get_chanctx(link))
+ err = ieee80211_link_use_reserved_reassign(link);
else
- err = ieee80211_vif_use_reserved_assign(sdata);
+ err = ieee80211_link_use_reserved_assign(link);
if (err) {
- sdata_info(sdata,
- "failed to finalize (re-)assign reservation (err=%d)\n",
- err);
- ieee80211_vif_unreserve_chanctx(sdata);
+ link_info(link,
+ "failed to finalize (re-)assign reservation (err=%d)\n",
+ err);
+ ieee80211_link_unreserve_chanctx(link);
cfg80211_stop_iface(local->hw.wiphy,
- &sdata->wdev,
+ &link->sdata->wdev,
GFP_KERNEL);
}
}
@@ -1609,21 +1735,25 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
err:
list_for_each_entry(ctx, &local->chanctx_list, list) {
+ struct ieee80211_link_data *link, *link_tmp;
+
if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
- list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
+ list_for_each_entry_safe(link, link_tmp, &ctx->reserved_links,
reserved_chanctx_list) {
- ieee80211_vif_unreserve_chanctx(sdata);
- ieee80211_vif_chanctx_reservation_complete(sdata);
+ ieee80211_link_unreserve_chanctx(link);
+ ieee80211_link_chanctx_reservation_complete(link);
}
}
return err;
}
-static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
+static void __ieee80211_link_release_channel(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
@@ -1631,38 +1761,37 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->chanctx_mtx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link_conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf)
return;
ctx = container_of(conf, struct ieee80211_chanctx, conf);
- if (sdata->reserved_chanctx) {
- if (sdata->reserved_chanctx->replace_state ==
- IEEE80211_CHANCTX_REPLACES_OTHER &&
- ieee80211_chanctx_num_reserved(local,
- sdata->reserved_chanctx) > 1)
+ if (link->reserved_chanctx) {
+ if (link->reserved_chanctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER &&
+ ieee80211_chanctx_num_reserved(local, link->reserved_chanctx) > 1)
use_reserved_switch = true;
- ieee80211_vif_unreserve_chanctx(sdata);
+ ieee80211_link_unreserve_chanctx(link);
}
- ieee80211_assign_vif_chanctx(sdata, NULL);
+ ieee80211_assign_link_chanctx(link, NULL);
if (ieee80211_chanctx_refcount(local, ctx) == 0)
ieee80211_free_chanctx(local, ctx);
- sdata->radar_required = false;
+ link->radar_required = false;
/* Unreserving may ready an in-place reservation. */
if (use_reserved_switch)
ieee80211_vif_use_reserved_switch(local);
}
-int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode)
+int ieee80211_link_use_channel(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *ctx;
u8 radar_detect_width = 0;
@@ -1670,7 +1799,11 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
lockdep_assert_held(&local->mtx);
- WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
+ if (sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(link->link_id))) {
+ ieee80211_link_update_chandef(link, chandef);
+ return 0;
+ }
mutex_lock(&local->chanctx_mtx);
@@ -1682,14 +1815,14 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
if (ret > 0)
radar_detect_width = BIT(chandef->width);
- sdata->radar_required = ret;
+ link->radar_required = ret;
ret = ieee80211_check_combinations(sdata, chandef, mode,
radar_detect_width);
if (ret < 0)
goto out;
- __ieee80211_vif_release_channel(sdata);
+ __ieee80211_link_release_channel(link);
ctx = ieee80211_find_chanctx(local, chandef, mode);
if (!ctx)
@@ -1699,9 +1832,9 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
goto out;
}
- ieee80211_vif_update_chandef(sdata, chandef);
+ ieee80211_link_update_chandef(link, chandef);
- ret = ieee80211_assign_vif_chanctx(sdata, ctx);
+ ret = ieee80211_assign_link_chanctx(link, ctx);
if (ret) {
/* if assign fails refcount stays the same */
if (ieee80211_chanctx_refcount(local, ctx) == 0)
@@ -1713,14 +1846,15 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_radar_chanctx(local, ctx);
out:
if (ret)
- sdata->radar_required = false;
+ link->radar_required = false;
mutex_unlock(&local->chanctx_mtx);
return ret;
}
-int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
+int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx *new_ctx;
struct ieee80211_chanctx *old_ctx;
@@ -1729,8 +1863,8 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
- new_ctx = sdata->reserved_chanctx;
- old_ctx = ieee80211_vif_get_chanctx(sdata);
+ new_ctx = link->reserved_chanctx;
+ old_ctx = ieee80211_link_get_chanctx(link);
if (WARN_ON(!new_ctx))
return -EINVAL;
@@ -1739,19 +1873,16 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
IEEE80211_CHANCTX_WILL_BE_REPLACED))
return -EINVAL;
- if (WARN_ON(sdata->reserved_ready))
+ if (WARN_ON(link->reserved_ready))
return -EINVAL;
- sdata->reserved_ready = true;
+ link->reserved_ready = true;
if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) {
if (old_ctx)
- err = ieee80211_vif_use_reserved_reassign(sdata);
- else
- err = ieee80211_vif_use_reserved_assign(sdata);
+ return ieee80211_link_use_reserved_reassign(link);
- if (err)
- return err;
+ return ieee80211_link_use_reserved_assign(link);
}
/*
@@ -1783,10 +1914,12 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
return 0;
}
-int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- u32 *changed)
+int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef,
+ u32 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *link_conf = link->conf;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
@@ -1798,18 +1931,18 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
return -EINVAL;
mutex_lock(&local->chanctx_mtx);
- if (cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) {
+ if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) {
ret = 0;
goto out;
}
if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT ||
- sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) {
+ link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) {
ret = -EINVAL;
goto out;
}
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link_conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf) {
ret = -EINVAL;
@@ -1844,7 +1977,7 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
break;
}
- ieee80211_vif_update_chandef(sdata, chandef);
+ ieee80211_link_update_chandef(link, chandef);
ieee80211_recalc_chanctx_chantype(local, ctx);
@@ -1855,19 +1988,24 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
return ret;
}
-void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
+void ieee80211_link_release_channel(struct ieee80211_link_data *link)
{
- WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
-
- lockdep_assert_held(&sdata->local->mtx);
+ struct ieee80211_sub_if_data *sdata = link->sdata;
mutex_lock(&sdata->local->chanctx_mtx);
- __ieee80211_vif_release_channel(sdata);
+ if (rcu_access_pointer(link->conf->chanctx_conf)) {
+ lockdep_assert_held(&sdata->local->mtx);
+ __ieee80211_link_release_channel(link);
+ }
mutex_unlock(&sdata->local->chanctx_mtx);
}
-void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata)
+void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ unsigned int link_id = link->link_id;
+ struct ieee80211_bss_conf *link_conf = link->conf;
+ struct ieee80211_bss_conf *ap_conf;
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *ap;
struct ieee80211_chanctx_conf *conf;
@@ -1879,9 +2017,12 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata)
mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(ap->vif.chanctx_conf,
+ rcu_read_lock();
+ ap_conf = rcu_dereference(ap->vif.link_conf[link_id]);
+ conf = rcu_dereference_protected(ap_conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
- rcu_assign_pointer(sdata->vif.chanctx_conf, conf);
+ rcu_assign_pointer(link_conf->chanctx_conf, conf);
+ rcu_read_unlock();
mutex_unlock(&local->chanctx_mtx);
}
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index d90a8f9cc3fd..b4c20f5e778e 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,4 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions
+ * Copyright (C) 2022 Intel Corporation
+ */
#ifndef __MAC80211_DEBUG_H
#define __MAC80211_DEBUG_H
#include <net/cfg80211.h>
@@ -130,6 +134,35 @@ do { \
#define sdata_dbg(sdata, fmt, ...) \
_sdata_dbg(1, sdata, fmt, ##__VA_ARGS__)
+#define link_info(link, fmt, ...) \
+ do { \
+ if ((link)->sdata->vif.valid_links) \
+ _sdata_info((link)->sdata, "[link %d] " fmt, \
+ (link)->link_id, \
+ ##__VA_ARGS__); \
+ else \
+ _sdata_info((link)->sdata, fmt, ##__VA_ARGS__); \
+ } while (0)
+#define link_err(link, fmt, ...) \
+ do { \
+ if ((link)->sdata->vif.valid_links) \
+ _sdata_err((link)->sdata, "[link %d] " fmt, \
+ (link)->link_id, \
+ ##__VA_ARGS__); \
+ else \
+ _sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \
+ } while (0)
+#define link_dbg(link, fmt, ...) \
+ do { \
+ if ((link)->sdata->vif.valid_links) \
+ _sdata_dbg(1, (link)->sdata, "[link %d] " fmt, \
+ (link)->link_id, \
+ ##__VA_ARGS__); \
+ else \
+ _sdata_dbg(1, (link)->sdata, fmt, \
+ ##__VA_ARGS__); \
+ } while (0)
+
#define ht_dbg(sdata, fmt, ...) \
_sdata_dbg(MAC80211_HT_DEBUG, \
sdata, fmt, ##__VA_ARGS__)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 8dbfe325ee66..78c7d60e8667 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019, 2021 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021-2022 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -201,6 +201,36 @@ static const struct file_operations airtime_flags_ops = {
.llseek = default_llseek,
};
+static ssize_t aql_pending_read(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[400];
+ int len = 0;
+
+ len = scnprintf(buf, sizeof(buf),
+ "AC AQL pending\n"
+ "VO %u us\n"
+ "VI %u us\n"
+ "BE %u us\n"
+ "BK %u us\n"
+ "total %u us\n",
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]),
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]),
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]),
+ atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]),
+ atomic_read(&local->aql_total_pending_airtime));
+ return simple_read_from_buffer(user_buf, count, ppos,
+ buf, len);
+}
+
+static const struct file_operations aql_pending_ops = {
+ .read = aql_pending_read,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
static ssize_t aql_txq_limit_read(struct file *file,
char __user *user_buf,
size_t count,
@@ -216,14 +246,14 @@ static ssize_t aql_txq_limit_read(struct file *file,
"VI %u %u\n"
"BE %u %u\n"
"BK %u %u\n",
- local->airtime[IEEE80211_AC_VO].aql_txq_limit_low,
- local->airtime[IEEE80211_AC_VO].aql_txq_limit_high,
- local->airtime[IEEE80211_AC_VI].aql_txq_limit_low,
- local->airtime[IEEE80211_AC_VI].aql_txq_limit_high,
- local->airtime[IEEE80211_AC_BE].aql_txq_limit_low,
- local->airtime[IEEE80211_AC_BE].aql_txq_limit_high,
- local->airtime[IEEE80211_AC_BK].aql_txq_limit_low,
- local->airtime[IEEE80211_AC_BK].aql_txq_limit_high);
+ local->aql_txq_limit_low[IEEE80211_AC_VO],
+ local->aql_txq_limit_high[IEEE80211_AC_VO],
+ local->aql_txq_limit_low[IEEE80211_AC_VI],
+ local->aql_txq_limit_high[IEEE80211_AC_VI],
+ local->aql_txq_limit_low[IEEE80211_AC_BE],
+ local->aql_txq_limit_high[IEEE80211_AC_BE],
+ local->aql_txq_limit_low[IEEE80211_AC_BK],
+ local->aql_txq_limit_high[IEEE80211_AC_BK]);
return simple_read_from_buffer(user_buf, count, ppos,
buf, len);
}
@@ -255,11 +285,11 @@ static ssize_t aql_txq_limit_write(struct file *file,
if (ac >= IEEE80211_NUM_ACS)
return -EINVAL;
- q_limit_low_old = local->airtime[ac].aql_txq_limit_low;
- q_limit_high_old = local->airtime[ac].aql_txq_limit_high;
+ q_limit_low_old = local->aql_txq_limit_low[ac];
+ q_limit_high_old = local->aql_txq_limit_high[ac];
- local->airtime[ac].aql_txq_limit_low = q_limit_low;
- local->airtime[ac].aql_txq_limit_high = q_limit_high;
+ local->aql_txq_limit_low[ac] = q_limit_low;
+ local->aql_txq_limit_high[ac] = q_limit_high;
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
@@ -382,46 +412,6 @@ static const struct file_operations force_tx_status_ops = {
.llseek = default_llseek,
};
-static ssize_t airtime_read(struct file *file,
- char __user *user_buf,
- size_t count,
- loff_t *ppos)
-{
- struct ieee80211_local *local = file->private_data;
- char buf[200];
- u64 v_t[IEEE80211_NUM_ACS];
- u64 wt[IEEE80211_NUM_ACS];
- int len = 0, ac;
-
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->airtime[ac].lock);
- v_t[ac] = local->airtime[ac].v_t;
- wt[ac] = local->airtime[ac].weight_sum;
- spin_unlock_bh(&local->airtime[ac].lock);
- }
- len = scnprintf(buf, sizeof(buf),
- "\tVO VI BE BK\n"
- "Virt-t\t%-10llu %-10llu %-10llu %-10llu\n"
- "Weight\t%-10llu %-10llu %-10llu %-10llu\n",
- v_t[0],
- v_t[1],
- v_t[2],
- v_t[3],
- wt[0],
- wt[1],
- wt[2],
- wt[3]);
-
- return simple_read_from_buffer(user_buf, count, ppos,
- buf, len);
-}
-
-static const struct file_operations airtime_ops = {
- .read = airtime_read,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
#ifdef CONFIG_PM
static ssize_t reset_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
@@ -504,6 +494,8 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_TX_ENCAP_OFFLOAD),
FLAG(SUPPORTS_RX_DECAP_OFFLOAD),
FLAG(SUPPORTS_CONC_MON_RX_DECAP),
+ FLAG(DETECTS_COLOR_COLLISION),
+ FLAG(MLO_MCAST_MULTI_LINK_TX),
#undef FLAG
};
@@ -634,8 +626,10 @@ static const struct file_operations stats_ ##name## _ops = { \
.llseek = generic_file_llseek, \
};
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
#define DEBUGFS_STATS_ADD(name) \
debugfs_create_u32(#name, 0400, statsd, &local->name);
+#endif
#define DEBUGFS_DEVSTATS_ADD(name) \
debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops);
@@ -668,15 +662,12 @@ void debugfs_hw_add(struct ieee80211_local *local)
DEBUGFS_ADD(hw_conf);
DEBUGFS_ADD_MODE(force_tx_status, 0600);
DEBUGFS_ADD_MODE(aql_enable, 0600);
+ DEBUGFS_ADD(aql_pending);
if (local->ops->wake_tx_queue)
DEBUGFS_ADD_MODE(aqm, 0600);
- if (wiphy_ext_feature_isset(local->hw.wiphy,
- NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
- DEBUGFS_ADD_MODE(airtime, 0600);
- DEBUGFS_ADD_MODE(airtime_flags, 0600);
- }
+ DEBUGFS_ADD_MODE(airtime_flags, 0600);
DEBUGFS_ADD(aql_txq_limit);
debugfs_create_u32("aql_threshold", 0600,
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index f53dec8a3d5c..16a04330e7dc 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -4,6 +4,7 @@
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright (C) 2015 Intel Deutschland GmbH
+ * Copyright (C) 2021-2022 Intel Corporation
*/
#include <linux/kobject.h>
@@ -22,7 +23,6 @@ static ssize_t key_##name##_read(struct file *file, \
return mac80211_format_buffer(userbuf, count, ppos, \
format_string, key->prop); \
}
-#define KEY_READ_D(name) KEY_READ(name, name, "%d\n")
#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n")
#define KEY_OPS(name) \
@@ -395,9 +395,9 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
debugfs_remove(sdata->debugfs.default_multicast_key);
sdata->debugfs.default_multicast_key = NULL;
- if (sdata->default_multicast_key) {
+ if (sdata->deflink.default_multicast_key) {
key = key_mtx_dereference(sdata->local,
- sdata->default_multicast_key);
+ sdata->deflink.default_multicast_key);
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_multicast_key =
debugfs_create_symlink("default_multicast_key",
@@ -414,7 +414,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
return;
key = key_mtx_dereference(sdata->local,
- sdata->default_mgmt_key);
+ sdata->deflink.default_mgmt_key);
if (key) {
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_mgmt_key =
@@ -443,7 +443,7 @@ ieee80211_debugfs_key_add_beacon_default(struct ieee80211_sub_if_data *sdata)
return;
key = key_mtx_dereference(sdata->local,
- sdata->default_beacon_key);
+ sdata->deflink.default_beacon_key);
if (key) {
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_beacon_key =
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index db724fc10a5f..5b014786fd2d 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
*/
#include <linux/kernel.h>
@@ -77,8 +77,6 @@ static ssize_t ieee80211_if_fmt_##name( \
IEEE80211_IF_FMT(name, field, "%#x\n")
#define IEEE80211_IF_FMT_LHEX(name, field) \
IEEE80211_IF_FMT(name, field, "%#lx\n")
-#define IEEE80211_IF_FMT_SIZE(name, field) \
- IEEE80211_IF_FMT(name, field, "%zd\n")
#define IEEE80211_IF_FMT_HEXARRAY(name, field) \
static ssize_t ieee80211_if_fmt_##name( \
@@ -210,8 +208,8 @@ IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_5ghz);
IEEE80211_IF_FILE(flags, flags, HEX);
IEEE80211_IF_FILE(state, state, LHEX);
IEEE80211_IF_FILE(txpower, vif.bss_conf.txpower, DEC);
-IEEE80211_IF_FILE(ap_power_level, ap_power_level, DEC);
-IEEE80211_IF_FILE(user_power_level, user_power_level, DEC);
+IEEE80211_IF_FILE(ap_power_level, deflink.ap_power_level, DEC);
+IEEE80211_IF_FILE(user_power_level, deflink.user_power_level, DEC);
static ssize_t
ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata,
@@ -234,8 +232,8 @@ ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata,
IEEE80211_IF_FILE_R(hw_queues);
/* STA attributes */
-IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
-IEEE80211_IF_FILE(aid, vif.bss_conf.aid, DEC);
+IEEE80211_IF_FILE(bssid, deflink.u.mgd.bssid, MAC);
+IEEE80211_IF_FILE(aid, vif.cfg.aid, DEC);
IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
@@ -258,7 +256,7 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
return -EOPNOTSUPP;
sdata_lock(sdata);
- err = __ieee80211_request_smps_mgd(sdata, smps_mode);
+ err = __ieee80211_request_smps_mgd(sdata, &sdata->deflink, smps_mode);
sdata_unlock(sdata);
return err;
@@ -276,8 +274,8 @@ static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata,
{
if (sdata->vif.type == NL80211_IFTYPE_STATION)
return snprintf(buf, buflen, "request: %s\nused: %s\n",
- smps_modes[sdata->u.mgd.req_smps],
- smps_modes[sdata->smps_mode]);
+ smps_modes[sdata->deflink.u.mgd.req_smps],
+ smps_modes[sdata->deflink.smps_mode]);
return -EINVAL;
}
@@ -339,7 +337,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test(
dev_kfree_skb(skb);
return -ENOTCONN;
}
- memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN);
+ memcpy(hdr->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
memcpy(hdr->addr3, addr, ETH_ALEN);
sdata_unlock(sdata);
@@ -368,7 +366,7 @@ IEEE80211_IF_FILE_W(tkip_mic_test);
static ssize_t ieee80211_if_parse_beacon_loss(
struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
{
- if (!ieee80211_sdata_running(sdata) || !sdata->vif.bss_conf.assoc)
+ if (!ieee80211_sdata_running(sdata) || !sdata->vif.cfg.assoc)
return -ENOTCONN;
ieee80211_beacon_loss(&sdata->vif);
@@ -512,34 +510,6 @@ static ssize_t ieee80211_if_fmt_aqm(
}
IEEE80211_IF_FILE_R(aqm);
-static ssize_t ieee80211_if_fmt_airtime(
- const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_txq *txq = sdata->vif.txq;
- struct airtime_info *air_info;
- int len;
-
- if (!txq)
- return 0;
-
- spin_lock_bh(&local->airtime[txq->ac].lock);
- air_info = to_airtime_info(txq);
- len = scnprintf(buf,
- buflen,
- "RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Virt-T: %lld us\n",
- air_info->rx_airtime,
- air_info->tx_airtime,
- air_info->weight,
- air_info->v_t);
- spin_unlock_bh(&local->airtime[txq->ac].lock);
-
- return len;
-}
-
-IEEE80211_IF_FILE_R(airtime);
-
IEEE80211_IF_FILE(multicast_to_unicast, u.ap.multicast_to_unicast, HEX);
/* IBSS attributes */
@@ -600,6 +570,30 @@ static ssize_t ieee80211_if_parse_tsf(
}
IEEE80211_IF_FILE_RW(tsf);
+static ssize_t ieee80211_if_fmt_valid_links(const struct ieee80211_sub_if_data *sdata,
+ char *buf, int buflen)
+{
+ return snprintf(buf, buflen, "0x%x\n", sdata->vif.valid_links);
+}
+IEEE80211_IF_FILE_R(valid_links);
+
+static ssize_t ieee80211_if_fmt_active_links(const struct ieee80211_sub_if_data *sdata,
+ char *buf, int buflen)
+{
+ return snprintf(buf, buflen, "0x%x\n", sdata->vif.active_links);
+}
+
+static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sdata,
+ const char *buf, int buflen)
+{
+ u16 active_links;
+
+ if (kstrtou16(buf, 0, &active_links))
+ return -EINVAL;
+
+ return ieee80211_set_active_links(&sdata->vif, active_links) ?: buflen;
+}
+IEEE80211_IF_FILE_RW(active_links);
#ifdef CONFIG_MAC80211_MESH
IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC);
@@ -685,10 +679,8 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata)
if (sdata->local->ops->wake_tx_queue &&
sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_NAN) {
+ sdata->vif.type != NL80211_IFTYPE_NAN)
DEBUGFS_ADD(aqm);
- DEBUGFS_ADD(airtime);
- }
}
static void add_sta_files(struct ieee80211_sub_if_data *sdata)
@@ -702,6 +694,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD_MODE(uapsd_queues, 0600);
DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600);
DEBUGFS_ADD_MODE(tdls_wider_bw, 0600);
+ DEBUGFS_ADD_MODE(valid_links, 0200);
+ DEBUGFS_ADD_MODE(active_links, 0600);
}
static void add_ap_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 9479f2787ea7..d3397c1248d3 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -202,7 +202,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
size_t bufsz = 400;
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
u64 rx_airtime = 0, tx_airtime = 0;
- u64 v_t[IEEE80211_NUM_ACS];
+ s32 deficit[IEEE80211_NUM_ACS];
ssize_t rv;
int ac;
@@ -210,18 +210,18 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
return -ENOMEM;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->airtime[ac].lock);
+ spin_lock_bh(&local->active_txq_lock[ac]);
rx_airtime += sta->airtime[ac].rx_airtime;
tx_airtime += sta->airtime[ac].tx_airtime;
- v_t[ac] = sta->airtime[ac].v_t;
- spin_unlock_bh(&local->airtime[ac].lock);
+ deficit[ac] = sta->airtime[ac].deficit;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
}
p += scnprintf(p, bufsz + buf - p,
"RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
- rx_airtime, tx_airtime, sta->airtime[0].weight,
- v_t[0], v_t[1], v_t[2], v_t[3]);
+ "Deficit: VO: %d us VI: %d us BE: %d us BK: %d us\n",
+ rx_airtime, tx_airtime, sta->airtime_weight,
+ deficit[0], deficit[1], deficit[2], deficit[3]);
rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
kfree(buf);
@@ -236,11 +236,11 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
int ac;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->airtime[ac].lock);
+ spin_lock_bh(&local->active_txq_lock[ac]);
sta->airtime[ac].rx_airtime = 0;
sta->airtime[ac].tx_airtime = 0;
- sta->airtime[ac].v_t = 0;
- spin_unlock_bh(&local->airtime[ac].lock);
+ sta->airtime[ac].deficit = sta->airtime_weight;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
}
return count;
@@ -263,10 +263,10 @@ static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
return -ENOMEM;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->airtime[ac].lock);
+ spin_lock_bh(&local->active_txq_lock[ac]);
q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
- spin_unlock_bh(&local->airtime[ac].lock);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending);
}
@@ -441,13 +441,13 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
#define PRINT_HT_CAP(_cond, _str) \
do { \
if (_cond) \
- p += scnprintf(p, sizeof(buf)+buf-p, "\t" _str "\n"); \
+ p += scnprintf(p, bufsz + buf - p, "\t" _str "\n"); \
} while (0)
char *buf, *p;
int i;
ssize_t bufsz = 512;
struct sta_info *sta = file->private_data;
- struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap;
+ struct ieee80211_sta_ht_cap *htc = &sta->sta.deflink.ht_cap;
ssize_t ret;
buf = kzalloc(bufsz, GFP_KERNEL);
@@ -531,7 +531,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
{
char *buf, *p;
struct sta_info *sta = file->private_data;
- struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap *vhtc = &sta->sta.deflink.vht_cap;
ssize_t ret;
ssize_t bufsz = 512;
@@ -646,7 +646,7 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
char *buf, *p;
size_t buf_sz = PAGE_SIZE;
struct sta_info *sta = file->private_data;
- struct ieee80211_sta_he_cap *hec = &sta->sta.he_cap;
+ struct ieee80211_sta_he_cap *hec = &sta->sta.deflink.he_cap;
struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp;
u8 ppe_size;
u8 *cap;
@@ -1052,9 +1052,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD(vht_capa);
DEBUGFS_ADD(he_capa);
- DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates);
- DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
- DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered);
+ DEBUGFS_ADD_COUNTER(rx_duplicates, deflink.rx_stats.num_duplicates);
+ DEBUGFS_ADD_COUNTER(rx_fragments, deflink.rx_stats.fragments);
+ DEBUGFS_ADD_COUNTER(tx_filtered, deflink.status_stats.filtered);
if (local->ops->wake_tx_queue) {
DEBUGFS_ADD(aqm);
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 48322e45e7dd..5392ffa18270 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2015 Intel Deutschland GmbH
+ * Copyright (C) 2022 Intel Corporation
*/
#include <net/mac80211.h>
#include "ieee80211_i.h"
@@ -180,9 +181,10 @@ void drv_sta_rc_update(struct ieee80211_local *local,
}
int drv_conf_tx(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata, u16 ac,
+ struct ieee80211_link_data *link, u16 ac,
const struct ieee80211_tx_queue_params *params)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
int ret = -EOPNOTSUPP;
might_sleep();
@@ -190,6 +192,10 @@ int drv_conf_tx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return -EIO;
+ if (sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(link->link_id)))
+ return 0;
+
if (params->cw_min == 0 || params->cw_min > params->cw_max) {
/*
* If we can't configure hardware anyway, don't warn. We may
@@ -201,10 +207,10 @@ int drv_conf_tx(struct ieee80211_local *local,
return -EINVAL;
}
- trace_drv_conf_tx(local, sdata, ac, params);
+ trace_drv_conf_tx(local, sdata, link->link_id, ac, params);
if (local->ops->conf_tx)
ret = local->ops->conf_tx(&local->hw, &sdata->vif,
- ac, params);
+ link->link_id, ac, params);
trace_drv_return_int(local, ret);
return ret;
}
@@ -270,6 +276,60 @@ void drv_reset_tsf(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+int drv_assign_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
+ struct ieee80211_chanctx *ctx)
+{
+ int ret = 0;
+
+ drv_verify_link_exists(sdata, link_conf);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ if (sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(link_conf->link_id)))
+ return 0;
+
+ trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx);
+ if (local->ops->assign_vif_chanctx) {
+ WARN_ON_ONCE(!ctx->driver_present);
+ ret = local->ops->assign_vif_chanctx(&local->hw,
+ &sdata->vif,
+ link_conf,
+ &ctx->conf);
+ }
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
+
+void drv_unassign_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
+ struct ieee80211_chanctx *ctx)
+{
+ might_sleep();
+
+ drv_verify_link_exists(sdata, link_conf);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(link_conf->link_id)))
+ return;
+
+ trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx);
+ if (local->ops->unassign_vif_chanctx) {
+ WARN_ON_ONCE(!ctx->driver_present);
+ local->ops->unassign_vif_chanctx(&local->hw,
+ &sdata->vif,
+ link_conf,
+ &ctx->conf);
+ }
+ trace_drv_return_void(local);
+}
+
int drv_switch_vif_chanctx(struct ieee80211_local *local,
struct ieee80211_vif_chanctx_switch *vifs,
int n_vifs, enum ieee80211_chanctx_switch_mode mode)
@@ -344,3 +404,117 @@ int drv_ampdu_action(struct ieee80211_local *local,
return ret;
}
+
+void drv_link_info_changed(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *info,
+ int link_id, u64 changed)
+{
+ might_sleep();
+
+ if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON |
+ BSS_CHANGED_BEACON_ENABLED) &&
+ sdata->vif.type != NL80211_IFTYPE_AP &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+ sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
+ sdata->vif.type != NL80211_IFTYPE_OCB))
+ return;
+
+ if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
+ sdata->vif.type == NL80211_IFTYPE_NAN ||
+ (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !sdata->vif.bss_conf.mu_mimo_owner &&
+ !(changed & BSS_CHANGED_TXPOWER))))
+ return;
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(link_id)))
+ return;
+
+ trace_drv_link_info_changed(local, sdata, info, changed);
+ if (local->ops->link_info_changed)
+ local->ops->link_info_changed(&local->hw, &sdata->vif,
+ info, changed);
+ else if (local->ops->bss_info_changed)
+ local->ops->bss_info_changed(&local->hw, &sdata->vif,
+ info, changed);
+ trace_drv_return_void(local);
+}
+
+int drv_set_key(struct ieee80211_local *local,
+ enum set_key_cmd cmd,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key)
+{
+ int ret;
+
+ might_sleep();
+
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ if (WARN_ON(key->link_id >= 0 && sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(key->link_id))))
+ return -ENOLINK;
+
+ trace_drv_set_key(local, cmd, sdata, sta, key);
+ ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
+ trace_drv_return_int(local, ret);
+ return ret;
+}
+
+int drv_change_vif_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 old_links, u16 new_links,
+ struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS])
+{
+ int ret = -EOPNOTSUPP;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ if (old_links == new_links)
+ return 0;
+
+ trace_drv_change_vif_links(local, sdata, old_links, new_links);
+ if (local->ops->change_vif_links)
+ ret = local->ops->change_vif_links(&local->hw, &sdata->vif,
+ old_links, new_links, old);
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
+
+int drv_change_sta_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ u16 old_links, u16 new_links)
+{
+ int ret = -EOPNOTSUPP;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ old_links &= sdata->vif.active_links;
+ new_links &= sdata->vif.active_links;
+
+ if (old_links == new_links)
+ return 0;
+
+ trace_drv_change_sta_links(local, sdata, sta, old_links, new_links);
+ if (local->ops->change_sta_links)
+ ret = local->ops->change_sta_links(&local->hw, &sdata->vif, sta,
+ old_links, new_links);
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4e2fc1a08681..81e40b0a3b16 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -147,37 +147,29 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed)
return ret;
}
-static inline void drv_bss_info_changed(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_bss_conf *info,
- u32 changed)
+static inline void drv_vif_cfg_changed(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u64 changed)
{
might_sleep();
- if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON |
- BSS_CHANGED_BEACON_ENABLED) &&
- sdata->vif.type != NL80211_IFTYPE_AP &&
- sdata->vif.type != NL80211_IFTYPE_ADHOC &&
- sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
- sdata->vif.type != NL80211_IFTYPE_OCB))
- return;
-
- if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
- sdata->vif.type == NL80211_IFTYPE_NAN ||
- (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
- !sdata->vif.mu_mimo_owner &&
- !(changed & BSS_CHANGED_TXPOWER))))
- return;
-
if (!check_sdata_in_driver(sdata))
return;
- trace_drv_bss_info_changed(local, sdata, info, changed);
- if (local->ops->bss_info_changed)
- local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed);
+ trace_drv_vif_cfg_changed(local, sdata, changed);
+ if (local->ops->vif_cfg_changed)
+ local->ops->vif_cfg_changed(&local->hw, &sdata->vif, changed);
+ else if (local->ops->bss_info_changed)
+ local->ops->bss_info_changed(&local->hw, &sdata->vif,
+ &sdata->vif.bss_conf, changed);
trace_drv_return_void(local);
}
+void drv_link_info_changed(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *info,
+ int link_id, u64 changed);
+
static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
struct netdev_hw_addr_list *mc_list)
{
@@ -234,25 +226,11 @@ static inline int drv_set_tim(struct ieee80211_local *local,
return ret;
}
-static inline int drv_set_key(struct ieee80211_local *local,
- enum set_key_cmd cmd,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta,
- struct ieee80211_key_conf *key)
-{
- int ret;
-
- might_sleep();
-
- sdata = get_bss_sdata(sdata);
- if (!check_sdata_in_driver(sdata))
- return -EIO;
-
- trace_drv_set_key(local, cmd, sdata, sta, key);
- ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
- trace_drv_return_int(local, ret);
- return ret;
-}
+int drv_set_key(struct ieee80211_local *local,
+ enum set_key_cmd cmd,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ struct ieee80211_key_conf *key);
static inline void drv_update_tkip_key(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -568,7 +546,7 @@ static inline void drv_sta_statistics(struct ieee80211_local *local,
}
int drv_conf_tx(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata, u16 ac,
+ struct ieee80211_link_data *link, u16 ac,
const struct ieee80211_tx_queue_params *params);
u64 drv_get_tsf(struct ieee80211_local *local,
@@ -915,76 +893,60 @@ static inline void drv_change_chanctx(struct ieee80211_local *local,
trace_drv_return_void(local);
}
-static inline int drv_assign_vif_chanctx(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx)
-{
- int ret = 0;
-
- if (!check_sdata_in_driver(sdata))
- return -EIO;
-
- trace_drv_assign_vif_chanctx(local, sdata, ctx);
- if (local->ops->assign_vif_chanctx) {
- WARN_ON_ONCE(!ctx->driver_present);
- ret = local->ops->assign_vif_chanctx(&local->hw,
- &sdata->vif,
- &ctx->conf);
- }
- trace_drv_return_int(local, ret);
-
- return ret;
-}
-
-static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx *ctx)
+static inline void drv_verify_link_exists(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf)
{
- might_sleep();
-
- if (!check_sdata_in_driver(sdata))
- return;
-
- trace_drv_unassign_vif_chanctx(local, sdata, ctx);
- if (local->ops->unassign_vif_chanctx) {
- WARN_ON_ONCE(!ctx->driver_present);
- local->ops->unassign_vif_chanctx(&local->hw,
- &sdata->vif,
- &ctx->conf);
- }
- trace_drv_return_void(local);
+ /* deflink always exists, so need to check only for other links */
+ if (sdata->deflink.conf != link_conf)
+ sdata_assert_lock(sdata);
}
+int drv_assign_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
+ struct ieee80211_chanctx *ctx);
+void drv_unassign_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
+ struct ieee80211_chanctx *ctx);
int drv_switch_vif_chanctx(struct ieee80211_local *local,
struct ieee80211_vif_chanctx_switch *vifs,
int n_vifs, enum ieee80211_chanctx_switch_mode mode);
static inline int drv_start_ap(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf)
{
int ret = 0;
+ /* make sure link_conf is protected */
+ drv_verify_link_exists(sdata, link_conf);
+
might_sleep();
if (!check_sdata_in_driver(sdata))
return -EIO;
- trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf);
+ trace_drv_start_ap(local, sdata, link_conf);
if (local->ops->start_ap)
- ret = local->ops->start_ap(&local->hw, &sdata->vif);
+ ret = local->ops->start_ap(&local->hw, &sdata->vif, link_conf);
trace_drv_return_int(local, ret);
return ret;
}
static inline void drv_stop_ap(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf)
{
+ /* make sure link_conf is protected */
+ drv_verify_link_exists(sdata, link_conf);
+
if (!check_sdata_in_driver(sdata))
return;
- trace_drv_stop_ap(local, sdata);
+ trace_drv_stop_ap(local, sdata, link_conf);
if (local->ops->stop_ap)
- local->ops->stop_ap(&local->hw, &sdata->vif);
+ local->ops->stop_ap(&local->hw, &sdata->vif, link_conf);
trace_drv_return_void(local);
}
@@ -1508,4 +1470,13 @@ static inline int drv_net_fill_forward_path(struct ieee80211_local *local,
return ret;
}
+int drv_change_vif_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 old_links, u16 new_links,
+ struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]);
+int drv_change_sta_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ u16 old_links, u16 new_links);
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
new file mode 100644
index 000000000000..18bc6b78b267
--- /dev/null
+++ b/net/mac80211/eht.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * EHT handling
+ *
+ * Copyright(c) 2021-2022 Intel Corporation
+ */
+
+#include "ieee80211_i.h"
+
+void
+ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const u8 *he_cap_ie, u8 he_cap_len,
+ const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
+ u8 eht_cap_len,
+ struct link_sta_info *link_sta)
+{
+ struct ieee80211_sta_eht_cap *eht_cap = &link_sta->pub->eht_cap;
+ struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
+ u8 eht_ppe_size = 0;
+ u8 mcs_nss_size;
+ u8 eht_total_size = sizeof(eht_cap->eht_cap_elem);
+ u8 *pos = (u8 *)eht_cap_ie_elem;
+
+ memset(eht_cap, 0, sizeof(*eht_cap));
+
+ if (!eht_cap_ie_elem ||
+ !ieee80211_get_eht_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
+ return;
+
+ mcs_nss_size = ieee80211_eht_mcs_nss_size(he_cap_ie_elem,
+ &eht_cap_ie_elem->fixed,
+ sdata->vif.type ==
+ NL80211_IFTYPE_STATION);
+
+ eht_total_size += mcs_nss_size;
+
+ /* Calculate the PPE thresholds length only if the header is present */
+ if (eht_cap_ie_elem->fixed.phy_cap_info[5] &
+ IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT) {
+ u16 eht_ppe_hdr;
+
+ if (eht_cap_len < eht_total_size + sizeof(u16))
+ return;
+
+ eht_ppe_hdr = get_unaligned_le16(eht_cap_ie_elem->optional + mcs_nss_size);
+ eht_ppe_size =
+ ieee80211_eht_ppe_size(eht_ppe_hdr,
+ eht_cap_ie_elem->fixed.phy_cap_info);
+ eht_total_size += eht_ppe_size;
+
+ /* we calculate as if NSS > 8 are valid, but don't handle that */
+ if (eht_ppe_size > sizeof(eht_cap->eht_ppe_thres))
+ return;
+ }
+
+ if (eht_cap_len < eht_total_size)
+ return;
+
+ /* Copy the static portion of the EHT capabilities */
+ memcpy(&eht_cap->eht_cap_elem, pos, sizeof(eht_cap->eht_cap_elem));
+ pos += sizeof(eht_cap->eht_cap_elem);
+
+ /* Copy MCS/NSS which depends on the peer capabilities */
+ memset(&eht_cap->eht_mcs_nss_supp, 0,
+ sizeof(eht_cap->eht_mcs_nss_supp));
+ memcpy(&eht_cap->eht_mcs_nss_supp, pos, mcs_nss_size);
+
+ if (eht_ppe_size)
+ memcpy(eht_cap->eht_ppe_thres,
+ &eht_cap_ie_elem->optional[mcs_nss_size],
+ eht_ppe_size);
+
+ eht_cap->has_eht = true;
+
+ link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta);
+ link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
+}
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index b2253df54413..a3830d925cc2 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -5,7 +5,7 @@
* Copied from cfg.c - originally
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2014 Intel Corporation (Author: Johannes Berg)
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018, 2022 Intel Corporation
*/
#include <linux/types.h>
#include <net/cfg80211.h>
@@ -83,17 +83,17 @@ static void ieee80211_get_stats(struct net_device *dev,
#define ADD_STA_STATS(sta) \
do { \
- data[i++] += sta->rx_stats.packets; \
- data[i++] += sta->rx_stats.bytes; \
- data[i++] += sta->rx_stats.num_duplicates; \
- data[i++] += sta->rx_stats.fragments; \
- data[i++] += sta->rx_stats.dropped; \
+ data[i++] += sinfo.rx_packets; \
+ data[i++] += sinfo.rx_bytes; \
+ data[i++] += (sta)->rx_stats.num_duplicates; \
+ data[i++] += (sta)->rx_stats.fragments; \
+ data[i++] += sinfo.rx_dropped_misc; \
\
data[i++] += sinfo.tx_packets; \
data[i++] += sinfo.tx_bytes; \
- data[i++] += sta->status_stats.filtered; \
- data[i++] += sta->status_stats.retry_failed; \
- data[i++] += sta->status_stats.retry_count; \
+ data[i++] += (sta)->status_stats.filtered; \
+ data[i++] += sinfo.tx_failed; \
+ data[i++] += sinfo.tx_retries; \
} while (0)
/* For Managed stations, find the single station based on BSSID
@@ -105,7 +105,7 @@ static void ieee80211_get_stats(struct net_device *dev,
mutex_lock(&local->sta_mtx);
if (sdata->vif.type == NL80211_IFTYPE_STATION) {
- sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid);
+ sta = sta_info_get_bss(sdata, sdata->deflink.u.mgd.bssid);
if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
goto do_survey;
@@ -114,7 +114,7 @@ static void ieee80211_get_stats(struct net_device *dev,
sta_set_sinfo(sta, &sinfo, false);
i = 0;
- ADD_STA_STATS(sta);
+ ADD_STA_STATS(&sta->deflink);
data[i++] = sta->sta_state;
@@ -140,7 +140,7 @@ static void ieee80211_get_stats(struct net_device *dev,
memset(&sinfo, 0, sizeof(sinfo));
sta_set_sinfo(sta, &sinfo, false);
i = 0;
- ADD_STA_STATS(sta);
+ ADD_STA_STATS(&sta->deflink);
}
}
@@ -150,7 +150,7 @@ do_survey:
survey.filled = 0;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (chanctx_conf)
channel = chanctx_conf->def.chan;
else
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index c05af7018f79..729f261520c7 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -3,15 +3,16 @@
* HE handling
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 - 2020 Intel Corporation
+ * Copyright(c) 2019 - 2022 Intel Corporation
*/
#include "ieee80211_i.h"
static void
ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
- struct sta_info *sta)
+ struct link_sta_info *link_sta)
{
+ struct sta_info *sta = link_sta->sta;
enum ieee80211_smps_mode smps_mode;
if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
@@ -30,26 +31,28 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_
break;
}
- sta->sta.smps_mode = smps_mode;
+ link_sta->pub->smps_mode = smps_mode;
} else {
- sta->sta.smps_mode = IEEE80211_SMPS_OFF;
+ link_sta->pub->smps_mode = IEEE80211_SMPS_OFF;
}
switch (le16_get_bits(he_6ghz_capa->capa,
IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
break;
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
break;
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
default:
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
break;
}
- sta->sta.he_6ghz_capa = *he_6ghz_capa;
+ ieee80211_sta_recalc_aggregates(&sta->sta);
+
+ link_sta->pub->he_6ghz_capa = *he_6ghz_capa;
}
static void ieee80211_he_mcs_disable(__le16 *he_mcs)
@@ -108,9 +111,9 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const u8 *he_cap_ie, u8 he_cap_len,
const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
- struct sta_info *sta)
+ struct link_sta_info *link_sta)
{
- struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+ struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap;
struct ieee80211_sta_he_cap own_he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
@@ -153,11 +156,11 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
he_cap->has_he = true;
- sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
- sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+ link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta);
+ link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa)
- ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta);
+ ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, link_sta);
ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80,
&he_cap->he_mcs_nss_supp.rx_mcs_80,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 2eb7641f5556..83bc41346ae7 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2021 Intel Corporation
+ * Copyright(c) 2020-2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -138,13 +138,16 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_ht_cap *ht_cap_ie,
- struct sta_info *sta)
+ struct link_sta_info *link_sta)
{
+ struct ieee80211_bss_conf *link_conf;
+ struct sta_info *sta = link_sta->sta;
struct ieee80211_sta_ht_cap ht_cap, own_cap;
u8 ampdu_info, tx_mcs_set_cap;
int i, max_tx_streams;
bool changed;
enum ieee80211_sta_rx_bandwidth bw;
+ enum nl80211_chan_width width;
memset(&ht_cap, 0, sizeof(ht_cap));
@@ -238,16 +241,25 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest;
if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU)
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935;
else
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839;
+
+ ieee80211_sta_recalc_aggregates(&sta->sta);
apply:
- changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
+ changed = memcmp(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap));
+
+ memcpy(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap));
- memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
+ rcu_read_lock();
+ link_conf = rcu_dereference(sdata->vif.link_conf[link_sta->link_id]);
+ if (WARN_ON(!link_conf))
+ width = NL80211_CHAN_WIDTH_20_NOHT;
+ else
+ width = link_conf->chandef.width;
- switch (sdata->vif.bss_conf.chandef.width) {
+ switch (width) {
default:
WARN_ON_ONCE(1);
fallthrough;
@@ -263,10 +275,11 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
break;
}
+ rcu_read_unlock();
- sta->sta.bandwidth = bw;
+ link_sta->pub->bandwidth = bw;
- sta->cur_max_bandwidth =
+ link_sta->cur_max_bandwidth =
ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
@@ -288,12 +301,13 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
break;
}
- if (smps_mode != sta->sta.smps_mode)
+ if (smps_mode != link_sta->pub->smps_mode)
changed = true;
- sta->sta.smps_mode = smps_mode;
+ link_sta->pub->smps_mode = smps_mode;
} else {
- sta->sta.smps_mode = IEEE80211_SMPS_OFF;
+ link_sta->pub->smps_mode = IEEE80211_SMPS_OFF;
}
+
return changed;
}
@@ -433,7 +447,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
@@ -539,31 +553,27 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
return 0;
}
-void ieee80211_request_smps_mgd_work(struct work_struct *work)
-{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
- u.mgd.request_smps_work);
-
- sdata_lock(sdata);
- __ieee80211_request_smps_mgd(sdata, sdata->u.mgd.driver_smps_mode);
- sdata_unlock(sdata);
-}
-
-void ieee80211_request_smps(struct ieee80211_vif *vif,
+void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id,
enum ieee80211_smps_mode smps_mode)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link;
if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION))
return;
- if (sdata->u.mgd.driver_smps_mode == smps_mode)
- return;
+ rcu_read_lock();
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link))
+ goto out;
+
+ if (link->u.mgd.driver_smps_mode == smps_mode)
+ goto out;
- sdata->u.mgd.driver_smps_mode = smps_mode;
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.request_smps_work);
+ link->u.mgd.driver_smps_mode = smps_mode;
+ ieee80211_queue_work(&sdata->local->hw, &link->u.mgd.request_smps_work);
+out:
+ rcu_read_unlock();
}
/* this might change ... don't want non-open drivers using it */
EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 0416c4d22292..9dffc3079588 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2021 Intel Corporation
+ * Copyright(c) 2018-2022 Intel Corporation
*/
#include <linux/delay.h>
@@ -244,9 +244,9 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
sta_info_flush(sdata);
/* if merging, indicate to driver that we leave the old IBSS */
- if (sdata->vif.bss_conf.ibss_joined) {
- sdata->vif.bss_conf.ibss_joined = false;
- sdata->vif.bss_conf.ibss_creator = false;
+ if (sdata->vif.cfg.ibss_joined) {
+ sdata->vif.cfg.ibss_joined = false;
+ sdata->vif.cfg.ibss_creator = false;
sdata->vif.bss_conf.enable_beacon = false;
netif_carrier_off(sdata->dev);
ieee80211_bss_info_change_notify(sdata,
@@ -255,8 +255,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
drv_leave_ibss(local, sdata);
}
- presp = rcu_dereference_protected(ifibss->presp,
- lockdep_is_held(&sdata->wdev.mtx));
+ presp = sdata_dereference(ifibss->presp, sdata);
RCU_INIT_POINTER(ifibss->presp, NULL);
if (presp)
kfree_rcu(presp, rcu_head);
@@ -301,15 +300,15 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
radar_required = err;
mutex_lock(&local->mtx);
- if (ieee80211_vif_use_channel(sdata, &chandef,
- ifibss->fixed_channel ?
+ if (ieee80211_link_use_channel(&sdata->deflink, &chandef,
+ ifibss->fixed_channel ?
IEEE80211_CHANCTX_SHARED :
IEEE80211_CHANCTX_EXCLUSIVE)) {
sdata_info(sdata, "Failed to join IBSS, no channel context\n");
mutex_unlock(&local->mtx);
return;
}
- sdata->radar_required = radar_required;
+ sdata->deflink.radar_required = radar_required;
mutex_unlock(&local->mtx);
memcpy(ifibss->bssid, bssid, ETH_ALEN);
@@ -326,8 +325,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.enable_beacon = true;
sdata->vif.bss_conf.beacon_int = beacon_int;
sdata->vif.bss_conf.basic_rates = basic_rates;
- sdata->vif.bss_conf.ssid_len = ifibss->ssid_len;
- memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len);
+ sdata->vif.cfg.ssid_len = ifibss->ssid_len;
+ memcpy(sdata->vif.cfg.ssid, ifibss->ssid, ifibss->ssid_len);
bss_change = BSS_CHANGED_BEACON_INT;
bss_change |= ieee80211_reset_erp_info(sdata);
bss_change |= BSS_CHANGED_BSSID;
@@ -352,26 +351,24 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
bss_change |= BSS_CHANGED_ERP_SLOT;
/* cf. IEEE 802.11 9.2.12 */
- if (chan->band == NL80211_BAND_2GHZ && have_higher_than_11mbit)
- sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
- else
- sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
+ sdata->deflink.operating_11g_mode =
+ chan->band == NL80211_BAND_2GHZ && have_higher_than_11mbit;
- ieee80211_set_wmm_default(sdata, true, false);
+ ieee80211_set_wmm_default(&sdata->deflink, true, false);
- sdata->vif.bss_conf.ibss_joined = true;
- sdata->vif.bss_conf.ibss_creator = creator;
+ sdata->vif.cfg.ibss_joined = true;
+ sdata->vif.cfg.ibss_creator = creator;
err = drv_join_ibss(local, sdata);
if (err) {
- sdata->vif.bss_conf.ibss_joined = false;
- sdata->vif.bss_conf.ibss_creator = false;
+ sdata->vif.cfg.ibss_joined = false;
+ sdata->vif.cfg.ibss_creator = false;
sdata->vif.bss_conf.enable_beacon = false;
- sdata->vif.bss_conf.ssid_len = 0;
+ sdata->vif.cfg.ssid_len = 0;
RCU_INIT_POINTER(ifibss->presp, NULL);
kfree_rcu(presp, rcu_head);
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
mutex_unlock(&local->mtx);
sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n",
err);
@@ -509,8 +506,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
- old_presp = rcu_dereference_protected(ifibss->presp,
- lockdep_is_held(&sdata->wdev.mtx));
+ old_presp = sdata_dereference(ifibss->presp, sdata);
presp = ieee80211_ibss_build_presp(sdata,
sdata->vif.bss_conf.beacon_int,
@@ -534,6 +530,10 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
sdata_assert_lock(sdata);
+ /* When not connected/joined, sending CSA doesn't make sense. */
+ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED)
+ return -ENOLINK;
+
/* update cfg80211 bss information with the new channel */
if (!is_zero_ether_addr(ifibss->bssid)) {
cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
@@ -544,12 +544,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
IEEE80211_PRIVACY(ifibss->privacy));
/* XXX: should not really modify cfg80211 data */
if (cbss) {
- cbss->channel = sdata->csa_chandef.chan;
+ cbss->channel = sdata->deflink.csa_chandef.chan;
cfg80211_put_bss(sdata->local->hw.wiphy, cbss);
}
}
- ifibss->chandef = sdata->csa_chandef;
+ ifibss->chandef = sdata->deflink.csa_chandef;
/* generate the beacon */
return ieee80211_ibss_csa_beacon(sdata, NULL);
@@ -622,7 +622,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
}
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON_ONCE(!chanctx_conf))
return NULL;
band = chanctx_conf->def.chan->band;
@@ -637,7 +637,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
- sta->sta.supp_rates[band] = supp_rates |
+ sta->sta.deflink.supp_rates[band] = supp_rates |
ieee80211_mandatory_rates(sband, scan_width);
return ieee80211_ibss_finish_sta(sta);
@@ -708,14 +708,13 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
netif_carrier_off(sdata->dev);
- sdata->vif.bss_conf.ibss_joined = false;
- sdata->vif.bss_conf.ibss_creator = false;
+ sdata->vif.cfg.ibss_joined = false;
+ sdata->vif.cfg.ibss_creator = false;
sdata->vif.bss_conf.enable_beacon = false;
- sdata->vif.bss_conf.ssid_len = 0;
+ sdata->vif.cfg.ssid_len = 0;
/* remove beacon */
- presp = rcu_dereference_protected(ifibss->presp,
- lockdep_is_held(&sdata->wdev.mtx));
+ presp = sdata_dereference(ifibss->presp, sdata);
RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
if (presp)
kfree_rcu(presp, rcu_head);
@@ -725,7 +724,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
BSS_CHANGED_IBSS);
drv_leave_ibss(local, sdata);
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
mutex_unlock(&local->mtx);
}
@@ -773,20 +772,21 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
enum nl80211_channel_type ch_type;
int err;
- u32 sta_flags;
+ ieee80211_conn_flags_t conn_flags;
u32 vht_cap_info = 0;
sdata_assert_lock(sdata);
- sta_flags = IEEE80211_STA_DISABLE_VHT;
+ conn_flags = IEEE80211_CONN_DISABLE_VHT;
+
switch (ifibss->chandef.width) {
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20_NOHT:
- sta_flags |= IEEE80211_STA_DISABLE_HT;
+ conn_flags |= IEEE80211_CONN_DISABLE_HT;
fallthrough;
case NL80211_CHAN_WIDTH_20:
- sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
+ conn_flags |= IEEE80211_CONN_DISABLE_40MHZ;
break;
default:
break;
@@ -799,7 +799,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
err = ieee80211_parse_ch_switch_ie(sdata, elems,
ifibss->chandef.chan->band,
vht_cap_info,
- sta_flags, ifibss->bssid, &csa_ie);
+ conn_flags, ifibss->bssid, &csa_ie);
/* can't switch to destination channel, fail */
if (err < 0)
goto disconnect;
@@ -842,7 +842,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
break;
default:
- /* should not happen, sta_flags should prevent VHT modes. */
+ /* should not happen, conn_flags should prevent VHT modes. */
WARN_ON(1);
goto disconnect;
}
@@ -923,7 +923,7 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata,
if (len < required_len)
return;
- if (!sdata->vif.csa_active)
+ if (!sdata->vif.bss_conf.csa_active)
ieee80211_ibss_process_chanswitch(sdata, elems, false);
}
@@ -1005,7 +1005,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
if (sta) {
u32 prev_rates;
- prev_rates = sta->sta.supp_rates[band];
+ prev_rates = sta->sta.deflink.supp_rates[band];
/* make sure mandatory rates are always added */
scan_width = NL80211_BSS_CHAN_WIDTH_20;
if (rx_status->bw == RATE_INFO_BW_5)
@@ -1013,13 +1013,13 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
else if (rx_status->bw == RATE_INFO_BW_10)
scan_width = NL80211_BSS_CHAN_WIDTH_10;
- sta->sta.supp_rates[band] = supp_rates |
+ sta->sta.deflink.supp_rates[band] = supp_rates |
ieee80211_mandatory_rates(sband, scan_width);
- if (sta->sta.supp_rates[band] != prev_rates) {
+ if (sta->sta.deflink.supp_rates[band] != prev_rates) {
ibss_dbg(sdata,
"updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n",
sta->sta.addr, prev_rates,
- sta->sta.supp_rates[band]);
+ sta->sta.deflink.supp_rates[band]);
rates_updated = true;
}
} else {
@@ -1043,7 +1043,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
/* we both use HT */
struct ieee80211_ht_cap htcap_ie;
struct cfg80211_chan_def chandef;
- enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
+ enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth;
cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT);
ieee80211_chandef_ht_oper(elems->ht_operation, &chandef);
@@ -1051,14 +1051,14 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie));
rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
&htcap_ie,
- sta);
+ &sta->deflink);
if (elems->vht_operation && elems->vht_cap_elem &&
sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20 &&
sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_40) {
/* we both use VHT */
struct ieee80211_vht_cap cap_ie;
- struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap cap = sta->sta.deflink.vht_cap;
u32 vht_cap_info =
le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
@@ -1068,12 +1068,13 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
&chandef);
memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- &cap_ie, sta);
- if (memcmp(&cap, &sta->sta.vht_cap, sizeof(cap)))
+ &cap_ie,
+ &sta->deflink);
+ if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap)))
rates_updated |= true;
}
- if (bw != sta->sta.bandwidth)
+ if (bw != sta->sta.deflink.bandwidth)
rates_updated |= true;
if (!cfg80211_chandef_compatible(&sdata->u.ibss.chandef,
@@ -1083,12 +1084,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
if (sta && rates_updated) {
u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED;
- u8 rx_nss = sta->sta.rx_nss;
+ u8 rx_nss = sta->sta.deflink.rx_nss;
/* Force rx_nss recalculation */
- sta->sta.rx_nss = 0;
+ sta->sta.deflink.rx_nss = 0;
rate_control_rate_init(sta);
- if (sta->sta.rx_nss != rx_nss)
+ if (sta->sta.deflink.rx_nss != rx_nss)
changed |= IEEE80211_RC_NSS_CHANGED;
drv_sta_rc_update(local, sdata, &sta->sta, changed);
@@ -1143,7 +1144,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
goto put_bss;
/* process channel switch */
- if (sdata->vif.csa_active ||
+ if (sdata->vif.bss_conf.csa_active ||
ieee80211_ibss_process_chanswitch(sdata, elems, true))
goto put_bss;
@@ -1220,7 +1221,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
return;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON_ONCE(!chanctx_conf)) {
rcu_read_unlock();
return;
@@ -1235,7 +1236,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
- sta->sta.supp_rates[band] = supp_rates |
+ sta->sta.deflink.supp_rates[band] = supp_rates |
ieee80211_mandatory_rates(sband, scan_width);
spin_lock(&ifibss->incomplete_lock);
@@ -1349,10 +1350,10 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
capability, 0, true);
}
-static unsigned ibss_setup_channels(struct wiphy *wiphy,
- struct ieee80211_channel **channels,
- unsigned int channels_max,
- u32 center_freq, u32 width)
+static unsigned int ibss_setup_channels(struct wiphy *wiphy,
+ struct ieee80211_channel **channels,
+ unsigned int channels_max,
+ u32 center_freq, u32 width)
{
struct ieee80211_channel *chan = NULL;
unsigned int n_chan = 0;
@@ -1529,8 +1530,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
sdata_assert_lock(sdata);
- presp = rcu_dereference_protected(ifibss->presp,
- lockdep_is_held(&sdata->wdev.mtx));
+ presp = sdata_dereference(ifibss->presp, sdata);
if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
len < 24 + 2 || !presp)
@@ -1603,8 +1603,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
return;
elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable,
- len - baselen, false,
- mgmt->bssid, NULL);
+ len - baselen, false, NULL);
if (elems) {
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems);
@@ -1657,7 +1656,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
elems = ieee802_11_parse_elems(
mgmt->u.action.u.chan_switch.variable,
- ies_len, true, mgmt->bssid, NULL);
+ ies_len, true, NULL);
if (elems && !elems->parse_error)
ieee80211_rx_mgmt_spectrum_mgmt(sdata, mgmt,
@@ -1851,10 +1850,10 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
| IEEE80211_HT_PARAM_RIFS_MODE;
changed |= BSS_CHANGED_HT | BSS_CHANGED_MCAST_RATE;
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- sdata->needed_rx_chains = local->rx_chains;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
+ sdata->deflink.needed_rx_chains = local->rx_chains;
sdata->control_port_over_nl80211 = params->control_port_over_nl80211;
ieee80211_queue_work(&local->hw, &sdata->work);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 330ea62231fa..a842f2e1c230 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#ifndef IEEE80211_I_H
@@ -83,6 +83,13 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
+
+/*
+ * Keep a station's queues on the active list for deficit accounting purposes
+ * if it was active or queued during the last 100ms
+ */
+#define AIRTIME_ACTIVE_DURATION (HZ / 10)
+
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -204,7 +211,9 @@ struct ieee80211_rx_data {
struct sk_buff *skb;
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
struct sta_info *sta;
+ struct link_sta_info *link_sta;
struct ieee80211_key *key;
unsigned int flags;
@@ -224,6 +233,8 @@ struct ieee80211_rx_data {
*/
int security_idx;
+ int link_id;
+
union {
struct {
u32 iv32;
@@ -257,6 +268,7 @@ struct beacon_data {
struct ieee80211_meshconf_ie *meshconf;
u16 cntdwn_counter_offsets[IEEE80211_MAX_CNTDWN_COUNTERS_NUM];
u8 cntdwn_current_counter;
+ struct cfg80211_mbssid_elems *mbssid_ies;
struct rcu_head rcu_head;
};
@@ -292,19 +304,13 @@ struct ps_data {
};
struct ieee80211_if_ap {
- struct beacon_data __rcu *beacon;
- struct probe_resp __rcu *probe_resp;
- struct fils_discovery_data __rcu *fils_discovery;
- struct unsol_bcast_probe_resp_data __rcu *unsol_bcast_probe_resp;
-
- /* to be used after channel switch. */
- struct cfg80211_beacon_data *next_beacon;
struct list_head vlans; /* write-protected with RTNL and local->mtx */
struct ps_data ps;
atomic_t num_mcast_sta; /* number of stations receiving multicast */
bool multicast_to_unicast;
+ bool active;
};
struct ieee80211_if_vlan {
@@ -354,18 +360,23 @@ struct ieee80211_roc_work {
enum ieee80211_sta_flags {
IEEE80211_STA_CONNECTION_POLL = BIT(1),
IEEE80211_STA_CONTROL_PORT = BIT(2),
- IEEE80211_STA_DISABLE_HT = BIT(4),
IEEE80211_STA_MFP_ENABLED = BIT(6),
IEEE80211_STA_UAPSD_ENABLED = BIT(7),
IEEE80211_STA_NULLFUNC_ACKED = BIT(8),
- IEEE80211_STA_RESET_SIGNAL_AVE = BIT(9),
- IEEE80211_STA_DISABLE_40MHZ = BIT(10),
- IEEE80211_STA_DISABLE_VHT = BIT(11),
- IEEE80211_STA_DISABLE_80P80MHZ = BIT(12),
- IEEE80211_STA_DISABLE_160MHZ = BIT(13),
- IEEE80211_STA_DISABLE_WMM = BIT(14),
IEEE80211_STA_ENABLE_RRM = BIT(15),
- IEEE80211_STA_DISABLE_HE = BIT(16),
+};
+
+typedef u32 __bitwise ieee80211_conn_flags_t;
+
+enum ieee80211_conn_flags {
+ IEEE80211_CONN_DISABLE_HT = (__force ieee80211_conn_flags_t)BIT(0),
+ IEEE80211_CONN_DISABLE_40MHZ = (__force ieee80211_conn_flags_t)BIT(1),
+ IEEE80211_CONN_DISABLE_VHT = (__force ieee80211_conn_flags_t)BIT(2),
+ IEEE80211_CONN_DISABLE_80P80MHZ = (__force ieee80211_conn_flags_t)BIT(3),
+ IEEE80211_CONN_DISABLE_160MHZ = (__force ieee80211_conn_flags_t)BIT(4),
+ IEEE80211_CONN_DISABLE_HE = (__force ieee80211_conn_flags_t)BIT(5),
+ IEEE80211_CONN_DISABLE_EHT = (__force ieee80211_conn_flags_t)BIT(6),
+ IEEE80211_CONN_DISABLE_320MHZ = (__force ieee80211_conn_flags_t)BIT(7),
};
struct ieee80211_mgd_auth_data {
@@ -376,41 +387,59 @@ struct ieee80211_mgd_auth_data {
u8 key[WLAN_KEY_LEN_WEP104];
u8 key_len, key_idx;
- bool done;
+ bool done, waiting;
bool peer_confirmed;
bool timeout_started;
+ u8 ap_addr[ETH_ALEN] __aligned(2);
+
u16 sae_trans, sae_status;
size_t data_len;
u8 data[];
};
struct ieee80211_mgd_assoc_data {
- struct cfg80211_bss *bss;
+ struct {
+ struct cfg80211_bss *bss;
+
+ u8 addr[ETH_ALEN] __aligned(2);
+
+ u8 ap_ht_param;
+
+ struct ieee80211_vht_cap ap_vht_cap;
+
+ size_t elems_len;
+ u8 *elems; /* pointing to inside ie[] below */
+
+ ieee80211_conn_flags_t conn_flags;
+ } link[IEEE80211_MLD_MAX_NUM_LINKS];
+
+ u8 ap_addr[ETH_ALEN] __aligned(2);
+
+ /* this is for a workaround, so we use it only for non-MLO */
const u8 *supp_rates;
+ u8 supp_rates_len;
unsigned long timeout;
int tries;
- u16 capability;
- u8 prev_bssid[ETH_ALEN];
+ u8 prev_ap_addr[ETH_ALEN];
u8 ssid[IEEE80211_MAX_SSID_LEN];
u8 ssid_len;
- u8 supp_rates_len;
bool wmm, uapsd;
bool need_beacon;
bool synced;
bool timeout_started;
+ bool s1g;
- u8 ap_ht_param;
-
- struct ieee80211_vht_cap ap_vht_cap;
+ unsigned int assoc_link_id;
u8 fils_nonces[2 * FILS_NONCE_LEN];
u8 fils_kek[FILS_MAX_KEK_LEN];
size_t fils_kek_len;
size_t ie_len;
+ u8 *ie_pos; /* used to fill ie[] with link[].elems */
u8 ie[];
};
@@ -438,9 +467,7 @@ struct ieee80211_if_managed {
struct timer_list timer;
struct timer_list conn_mon_timer;
struct timer_list bcn_mon_timer;
- struct timer_list chswitch_timer;
struct work_struct monitor_work;
- struct work_struct chswitch_work;
struct work_struct beacon_connection_loss_work;
struct work_struct csa_connection_drop_work;
@@ -450,31 +477,17 @@ struct ieee80211_if_managed {
bool nullfunc_failed;
u8 connection_loss:1,
driver_disconnect:1,
- reconnect:1;
+ reconnect:1,
+ associated:1;
- struct cfg80211_bss *associated;
struct ieee80211_mgd_auth_data *auth_data;
struct ieee80211_mgd_assoc_data *assoc_data;
- u8 bssid[ETH_ALEN] __aligned(2);
-
bool powersave; /* powersave requested for this iface */
bool broken_ap; /* AP is broken -- turn off powersave */
- bool have_beacon;
- u8 dtim_period;
- enum ieee80211_smps_mode req_smps, /* requested smps mode */
- driver_smps_mode; /* smps mode request */
-
- struct work_struct request_smps_work;
unsigned int flags;
- bool csa_waiting_bcn;
- bool csa_ignored_same_chan;
-
- bool beacon_crc_valid;
- u32 beacon_crc;
-
bool status_acked;
bool status_received;
__le16 status_fc;
@@ -499,39 +512,14 @@ struct ieee80211_if_managed {
*/
unsigned int uapsd_max_sp_len;
- int wmm_last_param_set;
- int mu_edca_last_param_set;
-
u8 use_4addr;
- s16 p2p_noa_index;
-
- struct ewma_beacon_signal ave_beacon_signal;
-
- /*
- * Number of Beacon frames used in ave_beacon_signal. This can be used
- * to avoid generating less reliable cqm events that would be based
- * only on couple of received frames.
- */
- unsigned int count_beacon_signal;
-
- /* Number of times beacon loss was invoked. */
- unsigned int beacon_loss_count;
-
- /*
- * Last Beacon frame signal strength average (ave_beacon_signal / 16)
- * that triggered a cqm event. 0 indicates that no event has been
- * generated for the current association.
- */
- int last_cqm_event_signal;
-
/*
* State variables for keeping track of RSSI of the AP currently
* connected to and informing driver when RSSI has gone
* below/above a certain threshold.
*/
int rssi_min_thold, rssi_max_thold;
- int last_ave_beacon_signal;
struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
@@ -546,7 +534,6 @@ struct ieee80211_if_managed {
struct sk_buff *orig_teardown_skb; /* The original teardown skb */
struct sk_buff *teardown_skb; /* A copy to send through the AP */
spinlock_t teardown_lock; /* To lock changing teardown_skb */
- bool tdls_chan_switch_prohibited;
bool tdls_wider_bw_prohibited;
/* WMM-AC TSPEC support */
@@ -759,19 +746,20 @@ struct ieee80211_if_mesh {
* enum ieee80211_sub_if_data_flags - virtual interface flags
*
* @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
- * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode
* @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
* associated stations and deliver multicast frames both
* back to wireless media and to the local net stack.
* @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume.
* @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver
+ * @IEEE80211_SDATA_DISCONNECT_HW_RESTART: Disconnect after hardware restart
+ * recovery
*/
enum ieee80211_sub_if_data_flags {
IEEE80211_SDATA_ALLMULTI = BIT(0),
- IEEE80211_SDATA_OPERATING_GMODE = BIT(2),
IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
IEEE80211_SDATA_IN_DRIVER = BIT(5),
+ IEEE80211_SDATA_DISCONNECT_HW_RESTART = BIT(6),
};
/**
@@ -826,8 +814,8 @@ struct ieee80211_chanctx {
struct list_head list;
struct rcu_head rcu_head;
- struct list_head assigned_vifs;
- struct list_head reserved_vifs;
+ struct list_head assigned_links;
+ struct list_head reserved_links;
enum ieee80211_chanctx_replace_state replace_state;
struct ieee80211_chanctx *replace_ctx;
@@ -857,16 +845,20 @@ enum txq_info_flags {
* @def_flow: used as a fallback flow when a packet destined to @tin hashes to
* a fq_flow which is already owned by a different tin
* @def_cvars: codel vars for @def_flow
- * @schedule_order: used with ieee80211_local->active_txqs
* @frags: used to keep fragments created after dequeue
+ * @schedule_order: used with ieee80211_local->active_txqs
+ * @schedule_round: counter to prevent infinite loops on TXQ scheduling
*/
struct txq_info {
struct fq_tin tin;
struct codel_vars def_cvars;
struct codel_stats cstats;
- struct rb_node schedule_order;
+
+ u16 schedule_round;
+ struct list_head schedule_order;
struct sk_buff_head frags;
+
unsigned long flags;
/* keep last! */
@@ -894,6 +886,117 @@ struct ieee80211_if_nan {
struct idr function_inst_ids;
};
+struct ieee80211_link_data_managed {
+ u8 bssid[ETH_ALEN] __aligned(2);
+
+ u8 dtim_period;
+ enum ieee80211_smps_mode req_smps, /* requested smps mode */
+ driver_smps_mode; /* smps mode request */
+
+ ieee80211_conn_flags_t conn_flags;
+
+ s16 p2p_noa_index;
+
+ bool tdls_chan_switch_prohibited;
+
+ bool have_beacon;
+ bool tracking_signal_avg;
+ bool disable_wmm_tracking;
+ bool operating_11g_mode;
+
+ bool csa_waiting_bcn;
+ bool csa_ignored_same_chan;
+ struct timer_list chswitch_timer;
+ struct work_struct chswitch_work;
+
+ struct work_struct request_smps_work;
+ bool beacon_crc_valid;
+ u32 beacon_crc;
+ struct ewma_beacon_signal ave_beacon_signal;
+ int last_ave_beacon_signal;
+
+ /*
+ * Number of Beacon frames used in ave_beacon_signal. This can be used
+ * to avoid generating less reliable cqm events that would be based
+ * only on couple of received frames.
+ */
+ unsigned int count_beacon_signal;
+
+ /* Number of times beacon loss was invoked. */
+ unsigned int beacon_loss_count;
+
+ /*
+ * Last Beacon frame signal strength average (ave_beacon_signal / 16)
+ * that triggered a cqm event. 0 indicates that no event has been
+ * generated for the current association.
+ */
+ int last_cqm_event_signal;
+
+ int wmm_last_param_set;
+ int mu_edca_last_param_set;
+
+ struct cfg80211_bss *bss;
+};
+
+struct ieee80211_link_data_ap {
+ struct beacon_data __rcu *beacon;
+ struct probe_resp __rcu *probe_resp;
+ struct fils_discovery_data __rcu *fils_discovery;
+ struct unsol_bcast_probe_resp_data __rcu *unsol_bcast_probe_resp;
+
+ /* to be used after channel switch. */
+ struct cfg80211_beacon_data *next_beacon;
+};
+
+struct ieee80211_link_data {
+ struct ieee80211_sub_if_data *sdata;
+ unsigned int link_id;
+
+ struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
+ struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
+
+ /* multicast keys only */
+ struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
+ NUM_DEFAULT_MGMT_KEYS +
+ NUM_DEFAULT_BEACON_KEYS];
+ struct ieee80211_key __rcu *default_multicast_key;
+ struct ieee80211_key __rcu *default_mgmt_key;
+ struct ieee80211_key __rcu *default_beacon_key;
+
+ struct work_struct csa_finalize_work;
+ bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
+
+ bool operating_11g_mode;
+
+ struct cfg80211_chan_def csa_chandef;
+
+ struct work_struct color_change_finalize_work;
+
+ /* context reservation -- protected with chanctx_mtx */
+ struct ieee80211_chanctx *reserved_chanctx;
+ struct cfg80211_chan_def reserved_chandef;
+ bool reserved_radar_required;
+ bool reserved_ready;
+
+ u8 needed_rx_chains;
+ enum ieee80211_smps_mode smps_mode;
+
+ int user_power_level; /* in dBm */
+ int ap_power_level; /* in dBm */
+
+ bool radar_required;
+ struct delayed_work dfs_cac_timer_work;
+
+ union {
+ struct ieee80211_link_data_managed mgd;
+ struct ieee80211_link_data_ap ap;
+ } u;
+
+ struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
+
+ struct ieee80211_bss_conf *conf;
+};
+
struct ieee80211_sub_if_data {
struct list_head list;
@@ -924,42 +1027,19 @@ struct ieee80211_sub_if_data {
/* bit field of ACM bits (BIT(802.1D tag)) */
u8 wmm_acm;
- struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS +
- NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS];
+ struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS];
struct ieee80211_key __rcu *default_unicast_key;
- struct ieee80211_key __rcu *default_multicast_key;
- struct ieee80211_key __rcu *default_mgmt_key;
- struct ieee80211_key __rcu *default_beacon_key;
u16 sequence_number;
+ u16 mld_mcast_seq;
__be16 control_port_protocol;
bool control_port_no_encrypt;
bool control_port_no_preauth;
bool control_port_over_nl80211;
- int encrypt_headroom;
atomic_t num_tx_queued;
- struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
struct mac80211_qos_map __rcu *qos_map;
- struct airtime_info airtime[IEEE80211_NUM_ACS];
-
- struct work_struct csa_finalize_work;
- bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
- struct cfg80211_chan_def csa_chandef;
-
- struct work_struct color_change_finalize_work;
-
- struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
- struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
-
- /* context reservation -- protected with chanctx_mtx */
- struct ieee80211_chanctx *reserved_chanctx;
- struct cfg80211_chan_def reserved_chandef;
- bool reserved_radar_required;
- bool reserved_ready;
-
/* used to reconfigure hardware SM PS */
struct work_struct recalc_smps;
@@ -967,15 +1047,6 @@ struct ieee80211_sub_if_data {
struct sk_buff_head skb_queue;
struct sk_buff_head status_queue;
- u8 needed_rx_chains;
- enum ieee80211_smps_mode smps_mode;
-
- int user_power_level; /* in dBm */
- int ap_power_level; /* in dBm */
-
- bool radar_required;
- struct delayed_work dfs_cac_timer_work;
-
/*
* AP this belongs to: self in AP mode and
* corresponding AP in VLAN mode, NULL for
@@ -1007,6 +1078,13 @@ struct ieee80211_sub_if_data {
struct ieee80211_if_nan nan;
} u;
+ struct ieee80211_link_data deflink;
+ struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
+
+ /* for ieee80211_set_active_links_async() */
+ struct work_struct activate_links_work;
+ u16 desired_active_links;
+
#ifdef CONFIG_MAC80211_DEBUGFS
struct {
struct dentry *subdir_stations;
@@ -1051,9 +1129,9 @@ sdata_assert_lock(struct ieee80211_sub_if_data *sdata)
}
static inline int
-ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
+ieee80211_chanwidth_get_shift(enum nl80211_chan_width width)
{
- switch (chandef->width) {
+ switch (width) {
case NL80211_CHAN_WIDTH_5:
return 2;
case NL80211_CHAN_WIDTH_10:
@@ -1064,13 +1142,19 @@ ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
}
static inline int
+ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
+{
+ return ieee80211_chanwidth_get_shift(chandef->width);
+}
+
+static inline int
ieee80211_vif_get_shift(struct ieee80211_vif *vif)
{
struct ieee80211_chanctx_conf *chanctx_conf;
int shift = 0;
rcu_read_lock();
- chanctx_conf = rcu_dereference(vif->chanctx_conf);
+ chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf);
if (chanctx_conf)
shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
rcu_read_unlock();
@@ -1078,6 +1162,20 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif)
return shift;
}
+static inline int
+ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems)
+{
+ int i, len = 0;
+
+ if (!elems)
+ return 0;
+
+ for (i = 0; i < elems->cnt; i++)
+ len += elems->elem[i].len;
+
+ return len;
+}
+
enum {
IEEE80211_RX_MSG = 1,
IEEE80211_TX_STATUS_MSG = 2,
@@ -1128,6 +1226,9 @@ struct tpt_led_trigger {
* a scan complete for an aborted scan.
* @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
* cancelled.
+ * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR
+ * and could send a probe request after receiving a beacon.
+ * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
*/
enum {
SCAN_SW_SCANNING,
@@ -1136,6 +1237,8 @@ enum {
SCAN_COMPLETED,
SCAN_ABORTED,
SCAN_HW_CANCELLED,
+ SCAN_BEACON_WAIT,
+ SCAN_BEACON_DONE,
};
/**
@@ -1160,44 +1263,6 @@ enum mac80211_scan_state {
SCAN_ABORT,
};
-/**
- * struct airtime_sched_info - state used for airtime scheduling and AQL
- *
- * @lock: spinlock that protects all the fields in this struct
- * @active_txqs: rbtree of currently backlogged queues, sorted by virtual time
- * @schedule_pos: the current position maintained while a driver walks the tree
- * with ieee80211_next_txq()
- * @active_list: list of struct airtime_info structs that were active within
- * the last AIRTIME_ACTIVE_DURATION (100 ms), used to compute
- * weight_sum
- * @last_weight_update: used for rate limiting walking active_list
- * @last_schedule_time: tracks the last time a transmission was scheduled; used
- * for catching up v_t if no stations are eligible for
- * transmission.
- * @v_t: global virtual time; queues with v_t < this are eligible for
- * transmission
- * @weight_sum: total sum of all active stations used for dividing airtime
- * @weight_sum_reciprocal: reciprocal of weight_sum (to avoid divisions in fast
- * path - see comment above
- * IEEE80211_RECIPROCAL_DIVISOR_64)
- * @aql_txq_limit_low: AQL limit when total outstanding airtime
- * is < IEEE80211_AQL_THRESHOLD
- * @aql_txq_limit_high: AQL limit when total outstanding airtime
- * is > IEEE80211_AQL_THRESHOLD
- */
-struct airtime_sched_info {
- spinlock_t lock;
- struct rb_root_cached active_txqs;
- struct rb_node *schedule_pos;
- struct list_head active_list;
- u64 last_weight_update;
- u64 last_schedule_activity;
- u64 v_t;
- u64 weight_sum;
- u64 weight_sum_reciprocal;
- u32 aql_txq_limit_low;
- u32 aql_txq_limit_high;
-};
DECLARE_STATIC_KEY_FALSE(aql_disable);
struct ieee80211_local {
@@ -1211,10 +1276,16 @@ struct ieee80211_local {
struct codel_params cparams;
/* protects active_txqs and txqi->schedule_order */
- struct airtime_sched_info airtime[IEEE80211_NUM_ACS];
+ spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
+ struct list_head active_txqs[IEEE80211_NUM_ACS];
+ u16 schedule_round[IEEE80211_NUM_ACS];
+
u16 airtime_flags;
+ u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
+ u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
u32 aql_threshold;
atomic_t aql_total_pending_airtime;
+ atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];
const struct ieee80211_ops *ops;
@@ -1318,6 +1389,7 @@ struct ieee80211_local {
unsigned long num_sta;
struct list_head sta_list;
struct rhltable sta_hash;
+ struct rhltable link_sta_hash;
struct timer_list sta_cleanup;
int sta_generation;
@@ -1501,9 +1573,31 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
struct ieee80211_chanctx_conf *chanctx_conf;
enum nl80211_band band;
+ WARN_ON(sdata->vif.valid_links);
+
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
+ band = chanctx_conf->def.chan->band;
+ rcu_read_unlock();
+
+ return local->hw.wiphy->bands[band];
+}
+
+static inline struct ieee80211_supported_band *
+ieee80211_get_link_sband(struct ieee80211_link_data *link)
+{
+ struct ieee80211_local *local = link->sdata->local;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ enum nl80211_band band;
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (!chanctx_conf) {
rcu_read_unlock();
return NULL;
@@ -1587,6 +1681,9 @@ struct ieee802_11_elems {
const struct ieee80211_s1g_oper_ie *s1g_oper;
const struct ieee80211_s1g_bcn_compat_ie *s1g_bcn_compat;
const struct ieee80211_aid_response_ie *aid_resp;
+ const struct ieee80211_eht_cap_elem *eht_cap;
+ const struct ieee80211_eht_operation *eht_operation;
+ const struct ieee80211_multi_link_elem *multi_link;
/* length of them, respectively */
u8 ext_capab_len;
@@ -1608,9 +1705,18 @@ struct ieee802_11_elems {
u8 bssid_index_len;
u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT];
u8 tx_pwr_env_num;
+ u8 eht_cap_len;
/* whether a parse error occurred while retrieving these elements */
bool parse_error;
+
+ /*
+ * scratch buffer that can be used for various element parsing related
+ * tasks, e.g., element de-fragmentation etc.
+ */
+ size_t scratch_len;
+ u8 *scratch_pos;
+ u8 scratch[];
};
static inline struct ieee80211_local *hw_to_local(
@@ -1631,131 +1737,6 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq)
return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
}
-static inline struct airtime_info *to_airtime_info(struct ieee80211_txq *txq)
-{
- struct ieee80211_sub_if_data *sdata;
- struct sta_info *sta;
-
- if (txq->sta) {
- sta = container_of(txq->sta, struct sta_info, sta);
- return &sta->airtime[txq->ac];
- }
-
- sdata = vif_to_sdata(txq->vif);
- return &sdata->airtime[txq->ac];
-}
-
-/* To avoid divisions in the fast path, we keep pre-computed reciprocals for
- * airtime weight calculations. There are two different weights to keep track
- * of: The per-station weight and the sum of weights per phy.
- *
- * For the per-station weights (kept in airtime_info below), we use 32-bit
- * reciprocals with a devisor of 2^19. This lets us keep the multiplications and
- * divisions for the station weights as 32-bit operations at the cost of a bit
- * of rounding error for high weights; but the choice of divisor keeps rounding
- * errors <10% for weights <2^15, assuming no more than 8ms of airtime is
- * reported at a time.
- *
- * For the per-phy sum of weights the values can get higher, so we use 64-bit
- * operations for those with a 32-bit divisor, which should avoid any
- * significant rounding errors.
- */
-#define IEEE80211_RECIPROCAL_DIVISOR_64 0x100000000ULL
-#define IEEE80211_RECIPROCAL_SHIFT_64 32
-#define IEEE80211_RECIPROCAL_DIVISOR_32 0x80000U
-#define IEEE80211_RECIPROCAL_SHIFT_32 19
-
-static inline void airtime_weight_set(struct airtime_info *air_info, u16 weight)
-{
- if (air_info->weight == weight)
- return;
-
- air_info->weight = weight;
- if (weight) {
- air_info->weight_reciprocal =
- IEEE80211_RECIPROCAL_DIVISOR_32 / weight;
- } else {
- air_info->weight_reciprocal = 0;
- }
-}
-
-static inline void airtime_weight_sum_set(struct airtime_sched_info *air_sched,
- int weight_sum)
-{
- if (air_sched->weight_sum == weight_sum)
- return;
-
- air_sched->weight_sum = weight_sum;
- if (air_sched->weight_sum) {
- air_sched->weight_sum_reciprocal = IEEE80211_RECIPROCAL_DIVISOR_64;
- do_div(air_sched->weight_sum_reciprocal, air_sched->weight_sum);
- } else {
- air_sched->weight_sum_reciprocal = 0;
- }
-}
-
-/* A problem when trying to enforce airtime fairness is that we want to divide
- * the airtime between the currently *active* stations. However, basing this on
- * the instantaneous queue state of stations doesn't work, as queues tend to
- * oscillate very quickly between empty and occupied, leading to the scheduler
- * thinking only a single station is active when deciding whether to allow
- * transmission (and thus not throttling correctly).
- *
- * To fix this we use a timer-based notion of activity: a station is considered
- * active if it has been scheduled within the last 100 ms; we keep a separate
- * list of all the stations considered active in this manner, and lazily update
- * the total weight of active stations from this list (filtering the stations in
- * the list by their 'last active' time).
- *
- * We add one additional safeguard to guard against stations that manage to get
- * scheduled every 100 ms but don't transmit a lot of data, and thus don't use
- * up any airtime. Such stations would be able to get priority for an extended
- * period of time if they do start transmitting at full capacity again, and so
- * we add an explicit maximum for how far behind a station is allowed to fall in
- * the virtual airtime domain. This limit is set to a relatively high value of
- * 20 ms because the main mechanism for catching up idle stations is the active
- * state as described above; i.e., the hard limit should only be hit in
- * pathological cases.
- */
-#define AIRTIME_ACTIVE_DURATION (100 * NSEC_PER_MSEC)
-#define AIRTIME_MAX_BEHIND 20000 /* 20 ms */
-
-static inline bool airtime_is_active(struct airtime_info *air_info, u64 now)
-{
- return air_info->last_scheduled >= now - AIRTIME_ACTIVE_DURATION;
-}
-
-static inline void airtime_set_active(struct airtime_sched_info *air_sched,
- struct airtime_info *air_info, u64 now)
-{
- air_info->last_scheduled = now;
- air_sched->last_schedule_activity = now;
- list_move_tail(&air_info->list, &air_sched->active_list);
-}
-
-static inline bool airtime_catchup_v_t(struct airtime_sched_info *air_sched,
- u64 v_t, u64 now)
-{
- air_sched->v_t = v_t;
- return true;
-}
-
-static inline void init_airtime_info(struct airtime_info *air_info,
- struct airtime_sched_info *air_sched)
-{
- atomic_set(&air_info->aql_tx_pending, 0);
- air_info->aql_limit_low = air_sched->aql_txq_limit_low;
- air_info->aql_limit_high = air_sched->aql_txq_limit_high;
- airtime_weight_set(air_info, IEEE80211_DEFAULT_AIRTIME_WEIGHT);
- INIT_LIST_HEAD(&air_info->list);
-}
-
-static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
-{
- return ether_addr_equal(raddr, addr) ||
- is_broadcast_ether_addr(raddr);
-}
-
static inline bool
ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
{
@@ -1789,7 +1770,12 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
- u32 changed);
+ u64 changed);
+void ieee80211_vif_cfg_change_notify(struct ieee80211_sub_if_data *sdata,
+ u64 changed);
+void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ u64 changed);
void ieee80211_configure_filter(struct ieee80211_local *local);
u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
@@ -1802,6 +1788,9 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_clear_fast_rx(struct sta_info *sta);
+bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata,
+ const u8 *addr, int *out_link_id);
+
/* STA code */
void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
@@ -1831,7 +1820,10 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
- u8 *bssid, u8 reason, bool tx);
+ u8 reason, bool tx);
+void ieee80211_mgd_setup_link(struct ieee80211_link_data *link);
+void ieee80211_mgd_stop_link(struct ieee80211_link_data *link);
+void ieee80211_mgd_set_link_qos_params(struct ieee80211_link_data *link);
/* IBSS code */
void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1961,6 +1953,17 @@ static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
return test_bit(SDATA_STATE_RUNNING, &sdata->state);
}
+/* link handling */
+void ieee80211_link_setup(struct ieee80211_link_data *link);
+void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
+ int link_id,
+ struct ieee80211_link_data *link,
+ struct ieee80211_bss_conf *link_conf);
+void ieee80211_link_stop(struct ieee80211_link_data *link);
+int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
+ u16 new_links);
+void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata);
+
/* tx handling */
void ieee80211_clear_tx_pending(struct ieee80211_local *local);
void ieee80211_tx_pending(struct tasklet_struct *t);
@@ -1981,7 +1984,6 @@ struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
- struct ieee80211_supported_band *sband,
int retry_count, int shift, bool send_to_cooked,
struct ieee80211_tx_status *status);
@@ -1992,17 +1994,9 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta);
int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len,
const u8 *dest, __be16 proto, bool unencrypted,
- u64 *cookie);
+ int link_id, u64 *cookie);
int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len);
-void ieee80211_resort_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq);
-void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq,
- bool purge);
-void ieee80211_update_airtime_weight(struct ieee80211_local *local,
- struct airtime_sched_info *air_sched,
- u64 now, bool force);
/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
@@ -2010,15 +2004,13 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_ht_cap *ht_cap_ie,
- struct sta_info *sta);
+ struct link_sta_info *link_sta);
void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
const u8 *da, u16 tid,
u16 initiator, u16 reason_code);
int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps, const u8 *da,
const u8 *bssid);
-void ieee80211_request_smps_ap_work(struct work_struct *work);
-void ieee80211_request_smps_mgd_work(struct work_struct *work);
bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old,
enum ieee80211_smps_mode smps_mode_new);
@@ -2066,27 +2058,31 @@ void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_vht_cap *vht_cap_ie,
- struct sta_info *sta);
-enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta);
-enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
-void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+ struct link_sta_info *link_sta);
+enum ieee80211_sta_rx_bandwidth
+ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
+enum ieee80211_sta_rx_bandwidth
+ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta);
+void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
-enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta);
+enum nl80211_chan_width
+ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta);
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct ieee80211_mgmt *mgmt);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 opmode,
- enum nl80211_band band);
+ struct link_sta_info *sta,
+ u8 opmode, enum nl80211_band band);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 opmode,
- enum nl80211_band band);
+ struct link_sta_info *sta,
+ u8 opmode, enum nl80211_band band);
void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_vht_cap *vht_cap);
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
u16 vht_mask[NL80211_VHT_NSS_MAX]);
enum nl80211_chan_width
-ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta);
+ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *sta);
/* HE */
void
@@ -2094,7 +2090,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const u8 *he_cap_ie, u8 he_cap_len,
const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
- struct sta_info *sta);
+ struct link_sta_info *link_sta);
void
ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
const struct ieee80211_he_spr *he_spr_ie_elem);
@@ -2121,12 +2117,9 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
* @elems: parsed 802.11 elements received with the frame
* @current_band: indicates the current band
* @vht_cap_info: VHT capabilities of the transmitter
- * @sta_flags: contains information about own capabilities and restrictions
- * to decide which channel switch announcements can be accepted. Only the
- * following subset of &enum ieee80211_sta_flags are evaluated:
- * %IEEE80211_STA_DISABLE_HT, %IEEE80211_STA_DISABLE_VHT,
- * %IEEE80211_STA_DISABLE_40MHZ, %IEEE80211_STA_DISABLE_80P80MHZ,
- * %IEEE80211_STA_DISABLE_160MHZ.
+ * @conn_flags: contains information about own capabilities and restrictions
+ * to decide which channel switch announcements can be accepted, using
+ * flags from &enum ieee80211_conn_flags.
* @bssid: the currently connected bssid (for reporting)
* @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl.
All of them will be filled with if success only.
@@ -2136,7 +2129,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
u32 vht_cap_info,
- u32 sta_flags, u8 *bssid,
+ ieee80211_conn_flags_t conn_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie);
/* Suspend/resume and hw reconfiguration */
@@ -2166,13 +2159,13 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len,
void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_queue_params *qparam,
int ac);
-void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
+void ieee80211_set_wmm_default(struct ieee80211_link_data *link,
bool bss_notify, bool enable_qos);
void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, struct sk_buff *skb);
void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, int tid,
+ struct sk_buff *skb, int tid, int link_id,
enum nl80211_band band);
/* sta_out needs to be checked for ERR_PTR() before using */
@@ -2186,49 +2179,76 @@ ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
enum nl80211_band band)
{
rcu_read_lock();
- __ieee80211_tx_skb_tid_band(sdata, skb, tid, band);
+ __ieee80211_tx_skb_tid_band(sdata, skb, tid, -1, band);
rcu_read_unlock();
}
-static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, int tid)
-{
- struct ieee80211_chanctx_conf *chanctx_conf;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (WARN_ON(!chanctx_conf)) {
- rcu_read_unlock();
- kfree_skb(skb);
- return;
- }
-
- __ieee80211_tx_skb_tid_band(sdata, skb, tid,
- chanctx_conf->def.chan->band);
- rcu_read_unlock();
-}
+void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, int tid, int link_id);
static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
/* Send all internal mgmt frames on VO. Accordingly set TID to 7. */
- ieee80211_tx_skb_tid(sdata, skb, 7);
+ ieee80211_tx_skb_tid(sdata, skb, 7, -1);
+}
+
+/**
+ * struct ieee80211_elems_parse_params - element parsing parameters
+ * @start: pointer to the elements
+ * @len: length of the elements
+ * @action: %true if the elements came from an action frame
+ * @filter: bitmap of element IDs to filter out while calculating
+ * the element CRC
+ * @crc: CRC starting value
+ * @bss: the BSS to parse this as, for multi-BSSID cases this can
+ * represent a non-transmitting BSS in which case the data
+ * for that non-transmitting BSS is returned
+ * @link_id: the link ID to parse elements for, if a STA profile
+ * is present in the multi-link element, or -1 to ignore
+ * @from_ap: frame is received from an AP (currently used only
+ * for EHT capabilities parsing)
+ */
+struct ieee80211_elems_parse_params {
+ const u8 *start;
+ size_t len;
+ bool action;
+ u64 filter;
+ u32 crc;
+ struct cfg80211_bss *bss;
+ int link_id;
+ bool from_ap;
+};
+
+struct ieee802_11_elems *
+ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params);
+
+static inline struct ieee802_11_elems *
+ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+ u64 filter, u32 crc,
+ struct cfg80211_bss *bss)
+{
+ struct ieee80211_elems_parse_params params = {
+ .start = start,
+ .len = len,
+ .action = action,
+ .filter = filter,
+ .crc = crc,
+ .bss = bss,
+ .link_id = -1,
+ };
+
+ return ieee802_11_parse_elems_full(&params);
}
-struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
- bool action,
- u64 filter, u32 crc,
- const u8 *transmitter_bssid,
- const u8 *bss_bssid);
static inline struct ieee802_11_elems *
ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
- const u8 *transmitter_bssid,
- const u8 *bss_bssid)
+ struct cfg80211_bss *bss)
{
- return ieee802_11_parse_elems_crc(start, len, action, 0, 0,
- transmitter_bssid, bss_bssid);
+ return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss);
}
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos);
extern const int ieee802_1d_to_ac[8];
@@ -2363,9 +2383,12 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band band, u32 *basic_rates);
int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
enum ieee80211_smps_mode smps_mode);
-void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata);
+void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link);
+void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
+ int link_id);
size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset);
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
@@ -2380,13 +2403,14 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
-u8 *ieee80211_ie_build_he_cap(u8 *pos,
+u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end);
void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_smps_mode smps_mode,
struct sk_buff *skb);
u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
-int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
+int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates);
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
@@ -2409,34 +2433,38 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
const struct ieee80211_vht_operation *oper,
const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef);
+void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper,
+ bool support_160, bool support_320,
+ struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_he_operation *he_oper,
+ const struct ieee80211_eht_operation *eht_oper,
struct cfg80211_chan_def *chandef);
bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
struct cfg80211_chan_def *chandef);
-u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
+ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
int __must_check
-ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode);
+ieee80211_link_use_channel(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode);
int __must_check
-ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- enum ieee80211_chanctx_mode mode,
- bool radar_required);
+ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef,
+ enum ieee80211_chanctx_mode mode,
+ bool radar_required);
int __must_check
-ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata);
-int ieee80211_vif_unreserve_chanctx(struct ieee80211_sub_if_data *sdata);
+ieee80211_link_use_reserved_context(struct ieee80211_link_data *link);
+int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);
int __must_check
-ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
- const struct cfg80211_chan_def *chandef,
- u32 *changed);
-void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
-void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
-void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
- bool clear);
+ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
+ const struct cfg80211_chan_def *chandef,
+ u32 *changed);
+void ieee80211_link_release_channel(struct ieee80211_link_data *link);
+void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link);
+void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
+ bool clear);
int ieee80211_chanctx_refcount(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx);
@@ -2453,14 +2481,6 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings *csa_settings);
-bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs);
-bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n);
-const struct ieee80211_cipher_scheme *
-ieee80211_cs_get(struct ieee80211_local *local, u32 cipher,
- enum nl80211_iftype iftype);
-int ieee80211_cs_headroom(struct ieee80211_local *local,
- struct cfg80211_crypto_settings *crypto,
- enum nl80211_iftype iftype);
void ieee80211_recalc_dtim(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
@@ -2514,4 +2534,18 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
+u8 *ieee80211_ie_build_eht_cap(u8 *pos,
+ const struct ieee80211_sta_he_cap *he_cap,
+ const struct ieee80211_sta_eht_cap *eht_cap,
+ u8 *end,
+ bool for_ap);
+
+void
+ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const u8 *he_cap_ie, u8 he_cap_len,
+ const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
+ u8 eht_cap_len,
+ struct link_sta_info *link_sta);
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 41531478437c..dd9ac1f7d2ea 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -51,7 +51,7 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
int power;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (!chanctx_conf) {
rcu_read_unlock();
return false;
@@ -60,11 +60,11 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
power = ieee80211_chandef_max_power(&chanctx_conf->def);
rcu_read_unlock();
- if (sdata->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
- power = min(power, sdata->user_power_level);
+ if (sdata->deflink.user_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(power, sdata->deflink.user_power_level);
- if (sdata->ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
- power = min(power, sdata->ap_power_level);
+ if (sdata->deflink.ap_power_level != IEEE80211_UNSET_POWER_LEVEL)
+ power = min(power, sdata->deflink.ap_power_level);
if (power != sdata->vif.bss_conf.txpower) {
sdata->vif.bss_conf.txpower = power;
@@ -80,7 +80,8 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
{
if (__ieee80211_recalc_txpower(sdata) ||
(update_bss && ieee80211_sdata_running(sdata)))
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_TXPOWER);
}
static u32 __ieee80211_idle_off(struct ieee80211_local *local)
@@ -199,15 +200,73 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
return ret;
}
+static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_roc_work *roc;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *scan_sdata;
+ int ret = 0;
+
+ /* To be the most flexible here we want to only limit changing the
+ * address if the specific interface is doing offchannel work or
+ * scanning.
+ */
+ if (netif_carrier_ok(sdata->dev))
+ return -EBUSY;
+
+ mutex_lock(&local->mtx);
+
+ /* First check no ROC work is happening on this iface */
+ list_for_each_entry(roc, &local->roc_list, list) {
+ if (roc->sdata != sdata)
+ continue;
+
+ if (roc->started) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ /* And if this iface is scanning */
+ if (local->scanning) {
+ scan_sdata = rcu_dereference_protected(local->scan_sdata,
+ lockdep_is_held(&local->mtx));
+ if (sdata == scan_sdata)
+ ret = -EBUSY;
+ }
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ /* More interface types could be added here but changing the
+ * address while powered makes the most sense in client modes.
+ */
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+unlock:
+ mutex_unlock(&local->mtx);
+ return ret;
+}
+
static int ieee80211_change_mac(struct net_device *dev, void *addr)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
struct sockaddr *sa = addr;
bool check_dup = true;
+ bool live = false;
int ret;
- if (ieee80211_sdata_running(sdata))
- return -EBUSY;
+ if (ieee80211_sdata_running(sdata)) {
+ ret = ieee80211_can_powered_addr_change(sdata);
+ if (ret)
+ return ret;
+
+ live = true;
+ }
if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
@@ -217,10 +276,20 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
if (ret)
return ret;
+ if (live)
+ drv_remove_interface(local, sdata);
ret = eth_mac_addr(dev, sa);
- if (ret == 0)
+ if (ret == 0) {
memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN);
+ ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
+ }
+
+ /* Regardless of eth_mac_addr() return we still want to add the
+ * interface back. This should not fail...
+ */
+ if (live)
+ WARN_ON(drv_add_interface(local, sdata));
return ret;
}
@@ -275,7 +344,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
* will not add another interface while any channel
* switch is active.
*/
- if (nsdata->vif.csa_active)
+ if (nsdata->vif.bss_conf.csa_active)
return -EBUSY;
/*
@@ -293,6 +362,11 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
nsdata->vif.type))
return -ENOTUNIQ;
+ /* No support for VLAN with MLO yet */
+ if (iftype == NL80211_IFTYPE_AP_VLAN &&
+ nsdata->wdev.use_4addr)
+ return -EOPNOTSUPP;
+
/*
* can only add VLANs to enabled APs
*/
@@ -378,6 +452,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
struct cfg80211_nan_func *func;
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
+ synchronize_rcu(); /* flush _ieee80211_wake_txqs() */
cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata;
if (cancel_scan)
@@ -386,7 +461,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
/*
* Stop TX on this interface first.
*/
- if (sdata->dev)
+ if (!local->ops->wake_tx_queue && sdata->dev)
netif_tx_stop_all_queues(sdata->dev);
ieee80211_roc_purge(local, sdata);
@@ -448,29 +523,34 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
cancel_work_sync(&local->dynamic_ps_enable_work);
cancel_work_sync(&sdata->recalc_smps);
+
sdata_lock(sdata);
+ WARN(sdata->vif.valid_links,
+ "destroying interface with valid links 0x%04x\n",
+ sdata->vif.valid_links);
+
mutex_lock(&local->mtx);
- sdata->vif.csa_active = false;
+ sdata->vif.bss_conf.csa_active = false;
if (sdata->vif.type == NL80211_IFTYPE_STATION)
- sdata->u.mgd.csa_waiting_bcn = false;
- if (sdata->csa_block_tx) {
+ sdata->deflink.u.mgd.csa_waiting_bcn = false;
+ if (sdata->deflink.csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
+ sdata->deflink.csa_block_tx = false;
}
mutex_unlock(&local->mtx);
sdata_unlock(sdata);
- cancel_work_sync(&sdata->csa_finalize_work);
- cancel_work_sync(&sdata->color_change_finalize_work);
+ cancel_work_sync(&sdata->deflink.csa_finalize_work);
+ cancel_work_sync(&sdata->deflink.color_change_finalize_work);
- cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
+ cancel_delayed_work_sync(&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chandef;
WARN_ON(local->suspended);
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
mutex_unlock(&local->mtx);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
@@ -502,7 +582,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
mutex_unlock(&local->mtx);
- RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL);
+ RCU_INIT_POINTER(sdata->vif.bss_conf.chanctx_conf, NULL);
/* see comment in the default case below */
ieee80211_free_keys(sdata, true);
/* no need to tell driver */
@@ -674,6 +754,8 @@ static int ieee80211_stop(struct net_device *dev)
ieee80211_stop_mbssid(sdata);
}
+ cancel_work_sync(&sdata->activate_links_work);
+
wiphy_lock(sdata->local->hw.wiphy);
ieee80211_do_stop(sdata, true);
wiphy_unlock(sdata->local->hw.wiphy);
@@ -719,6 +801,9 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
if (ieee80211_vif_is_mesh(&sdata->vif))
ieee80211_mesh_teardown_sdata(sdata);
+
+ ieee80211_vif_clear_links(sdata);
+ ieee80211_link_stop(&sdata->deflink);
}
static void ieee80211_uninit(struct net_device *dev)
@@ -831,7 +916,7 @@ static int ieee80211_netdev_fill_forward_path(struct net_device_path_ctx *ctx,
}
}
- sta = sta_info_get(sdata, sdata->u.mgd.bssid);
+ sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid);
break;
default:
goto out;
@@ -1011,6 +1096,22 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
}
+static void ieee80211_sdata_init(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ sdata->local = local;
+
+ /*
+ * Initialize the default link, so we can use link_id 0 for non-MLD,
+ * and that continues to work for non-MLD-aware drivers that use just
+ * vif.bss_conf instead of vif.link_conf.
+ *
+ * Note that we never change this, so if link ID 0 isn't used in an
+ * MLD connection, we get a separate allocation for it.
+ */
+ ieee80211_link_init(sdata, -1, &sdata->deflink, &sdata->vif.bss_conf);
+}
+
int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
@@ -1030,13 +1131,12 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
return -ENOMEM;
/* set up data */
- sdata->local = local;
sdata->vif.type = NL80211_IFTYPE_MONITOR;
snprintf(sdata->name, IFNAMSIZ, "%s-monitor",
wiphy_name(local->hw.wiphy));
sdata->wdev.iftype = NL80211_IFTYPE_MONITOR;
- sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+ ieee80211_sdata_init(local, sdata);
ieee80211_set_default_queues(sdata);
@@ -1060,8 +1160,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
mutex_unlock(&local->iflist_mtx);
mutex_lock(&local->mtx);
- ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef,
- IEEE80211_CHANCTX_EXCLUSIVE);
+ ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef,
+ IEEE80211_CHANCTX_EXCLUSIVE);
mutex_unlock(&local->mtx);
if (ret) {
mutex_lock(&local->iflist_mtx);
@@ -1105,7 +1205,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
synchronize_net();
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
mutex_unlock(&local->mtx);
drv_remove_interface(local, sdata);
@@ -1210,8 +1310,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
/* no need to tell driver, but set carrier and chanctx */
- if (rtnl_dereference(sdata->bss->beacon)) {
- ieee80211_vif_vlan_copy_chanctx(sdata);
+ if (sdata->bss->active) {
+ ieee80211_link_vlan_copy_chanctx(&sdata->deflink);
netif_carrier_on(dev);
ieee80211_set_vif_encap_ops(sdata);
} else {
@@ -1283,7 +1383,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
sdata->vif.type != NL80211_IFTYPE_NAN)
changed |= ieee80211_reset_erp_info(sdata);
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ changed);
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
@@ -1307,12 +1408,10 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
* doesn't start up with sane defaults.
* Enable QoS for anything but station interfaces.
*/
- ieee80211_set_wmm_default(sdata, true,
+ ieee80211_set_wmm_default(&sdata->deflink, true,
sdata->vif.type != NL80211_IFTYPE_STATION);
}
- set_bit(SDATA_STATE_RUNNING, &sdata->state);
-
switch (sdata->vif.type) {
case NL80211_IFTYPE_P2P_DEVICE:
rcu_assign_pointer(local->p2p_sdata, sdata);
@@ -1371,6 +1470,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
+ set_bit(SDATA_STATE_RUNNING, &sdata->state);
+
return 0;
err_del_interface:
drv_remove_interface(local, sdata);
@@ -1459,14 +1560,16 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
sta = sta_info_get_bss(sdata, mgmt->sa);
if (sta)
- ieee80211_vht_handle_opmode(sdata, sta, opmode,
- band);
+ ieee80211_vht_handle_opmode(sdata,
+ &sta->deflink,
+ opmode, band);
mutex_unlock(&local->sta_mtx);
break;
}
case WLAN_VHT_ACTION_GROUPID_MGMT:
- ieee80211_process_mu_groups(sdata, mgmt);
+ ieee80211_process_mu_groups(sdata, &sdata->deflink,
+ mgmt);
break;
default:
WARN_ON(1);
@@ -1620,7 +1723,16 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data, recalc_smps);
- ieee80211_recalc_smps(sdata);
+ ieee80211_recalc_smps(sdata, &sdata->deflink);
+}
+
+static void ieee80211_activate_links_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ activate_links_work);
+
+ ieee80211_set_active_links(&sdata->vif, sdata->desired_active_links);
}
/*
@@ -1632,8 +1744,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
static const u8 bssid_wildcard[ETH_ALEN] = {0xff, 0xff, 0xff,
0xff, 0xff, 0xff};
- /* clear type-dependent union */
+ /* clear type-dependent unions */
memset(&sdata->u, 0, sizeof(sdata->u));
+ memset(&sdata->deflink.u, 0, sizeof(sdata->deflink.u));
/* and set some type-dependent values */
sdata->vif.type = type;
@@ -1644,8 +1757,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
sdata->control_port_no_encrypt = false;
sdata->control_port_over_nl80211 = false;
sdata->control_port_no_preauth = false;
- sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
- sdata->vif.bss_conf.idle = true;
+ sdata->vif.cfg.idle = true;
sdata->vif.bss_conf.txpower = INT_MIN; /* unset */
sdata->noack_map = 0;
@@ -1660,10 +1772,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
skb_queue_head_init(&sdata->status_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
- INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
- INIT_WORK(&sdata->color_change_finalize_work, ieee80211_color_change_finalize_work);
- INIT_LIST_HEAD(&sdata->assigned_chanctx_list);
- INIT_LIST_HEAD(&sdata->reserved_chanctx_list);
+ INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
@@ -1682,7 +1791,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
sdata->vif.p2p = true;
fallthrough;
case NL80211_IFTYPE_STATION:
- sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid;
+ sdata->vif.bss_conf.bssid = sdata->deflink.u.mgd.bssid;
ieee80211_sta_setup_sdata(sdata);
break;
case NL80211_IFTYPE_OCB:
@@ -1719,6 +1828,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
break;
}
+ /* need to do this after the switch so vif.type is correct */
+ ieee80211_link_setup(&sdata->deflink);
+
ieee80211_debugfs_add_netdev(sdata);
}
@@ -1735,6 +1847,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
if (!local->ops->change_interface)
return -EBUSY;
+ /* for now, don't support changing while links exist */
+ if (sdata->vif.valid_links)
+ return -EBUSY;
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
if (!list_empty(&sdata->u.ap.vlans))
@@ -1994,9 +2110,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
wdev = &sdata->wdev;
sdata->dev = NULL;
- strlcpy(sdata->name, name, IFNAMSIZ);
+ strscpy(sdata->name, name, IFNAMSIZ);
ieee80211_assign_perm_addr(local, wdev->address, type);
memcpy(sdata->vif.addr, wdev->address, ETH_ALEN);
+ ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
} else {
int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size,
sizeof(void *));
@@ -2061,6 +2178,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata = netdev_priv(ndev);
ndev->ieee80211_ptr = &sdata->wdev;
memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
+ ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr);
memcpy(sdata->name, ndev->name, IFNAMSIZ);
if (txq_size) {
@@ -2073,14 +2191,13 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
/* initialise type-independent data */
sdata->wdev.wiphy = local->hw.wiphy;
- sdata->local = local;
+
+ ieee80211_sdata_init(local, sdata);
ieee80211_init_frag_cache(&sdata->frags);
INIT_LIST_HEAD(&sdata->key_list);
- INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work,
- ieee80211_dfs_cac_timer_work);
INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
ieee80211_delayed_tailroom_dec);
@@ -2108,15 +2225,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
}
}
- for (i = 0; i < IEEE80211_NUM_ACS; i++)
- init_airtime_info(&sdata->airtime[i], &local->airtime[i]);
-
ieee80211_set_default_queues(sdata);
- sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
- sdata->user_power_level = local->user_power_level;
-
- sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+ sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ sdata->deflink.user_power_level = local->user_power_level;
/* setup type-dependent data */
ieee80211_setup_sdata(sdata, type);
@@ -2127,6 +2239,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata->u.mgd.use_4addr = params->use_4addr;
ndev->features |= local->hw.netdev_features;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ndev->hw_features |= ndev->features &
MAC80211_SUPPORTED_FEATURES_TX;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index f695fc80088b..e8f6c1e5eabf 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,7 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright 2018-2020 Intel Corporation
+ * Copyright 2018-2020, 2022 Intel Corporation
*/
#include <linux/if_ether.h>
@@ -177,6 +177,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
}
}
+ if (key->conf.link_id >= 0 && sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(key->conf.link_id)))
+ return 0;
+
ret = drv_set_key(key->local, SET_KEY, sdata,
sta ? &sta->sta : NULL, &key->conf);
@@ -246,6 +250,10 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
sta = key->sta;
sdata = key->sdata;
+ if (key->conf.link_id >= 0 && sdata->vif.active_links &&
+ !(sdata->vif.active_links & BIT(key->conf.link_id)))
+ return;
+
if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
IEEE80211_KEY_FLAG_PUT_MIC_SPACE |
IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
@@ -344,15 +352,19 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
}
}
-static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
+static void __ieee80211_set_default_key(struct ieee80211_link_data *link,
int idx, bool uni, bool multi)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_key *key = NULL;
assert_key_lock(sdata->local);
- if (idx >= 0 && idx < NUM_DEFAULT_KEYS)
+ if (idx >= 0 && idx < NUM_DEFAULT_KEYS) {
key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ if (!key)
+ key = key_mtx_dereference(sdata->local, link->gtk[idx]);
+ }
if (uni) {
rcu_assign_pointer(sdata->default_unicast_key, key);
@@ -362,46 +374,48 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
}
if (multi)
- rcu_assign_pointer(sdata->default_multicast_key, key);
+ rcu_assign_pointer(link->default_multicast_key, key);
ieee80211_debugfs_key_update_default(sdata);
}
-void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
+void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx,
bool uni, bool multi)
{
- mutex_lock(&sdata->local->key_mtx);
- __ieee80211_set_default_key(sdata, idx, uni, multi);
- mutex_unlock(&sdata->local->key_mtx);
+ mutex_lock(&link->sdata->local->key_mtx);
+ __ieee80211_set_default_key(link, idx, uni, multi);
+ mutex_unlock(&link->sdata->local->key_mtx);
}
static void
-__ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
+__ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_key *key = NULL;
assert_key_lock(sdata->local);
if (idx >= NUM_DEFAULT_KEYS &&
idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
- key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ key = key_mtx_dereference(sdata->local, link->gtk[idx]);
- rcu_assign_pointer(sdata->default_mgmt_key, key);
+ rcu_assign_pointer(link->default_mgmt_key, key);
ieee80211_debugfs_key_update_default(sdata);
}
-void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
+void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link,
int idx)
{
- mutex_lock(&sdata->local->key_mtx);
- __ieee80211_set_default_mgmt_key(sdata, idx);
- mutex_unlock(&sdata->local->key_mtx);
+ mutex_lock(&link->sdata->local->key_mtx);
+ __ieee80211_set_default_mgmt_key(link, idx);
+ mutex_unlock(&link->sdata->local->key_mtx);
}
static void
-__ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, int idx)
+__ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_key *key = NULL;
assert_key_lock(sdata->local);
@@ -409,37 +423,75 @@ __ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata, int idx)
if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS &&
idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
NUM_DEFAULT_BEACON_KEYS)
- key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ key = key_mtx_dereference(sdata->local, link->gtk[idx]);
- rcu_assign_pointer(sdata->default_beacon_key, key);
+ rcu_assign_pointer(link->default_beacon_key, key);
ieee80211_debugfs_key_update_default(sdata);
}
-void ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata,
+void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link,
int idx)
{
- mutex_lock(&sdata->local->key_mtx);
- __ieee80211_set_default_beacon_key(sdata, idx);
- mutex_unlock(&sdata->local->key_mtx);
+ mutex_lock(&link->sdata->local->key_mtx);
+ __ieee80211_set_default_beacon_key(link, idx);
+ mutex_unlock(&link->sdata->local->key_mtx);
}
static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- bool pairwise,
- struct ieee80211_key *old,
- struct ieee80211_key *new)
+ struct ieee80211_link_data *link,
+ struct sta_info *sta,
+ bool pairwise,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
{
+ struct link_sta_info *link_sta = sta ? &sta->deflink : NULL;
+ int link_id;
int idx;
int ret = 0;
bool defunikey, defmultikey, defmgmtkey, defbeaconkey;
+ bool is_wep;
/* caller must provide at least one old/new */
if (WARN_ON(!new && !old))
return 0;
- if (new)
- list_add_tail_rcu(&new->list, &sdata->key_list);
+ if (new) {
+ idx = new->conf.keyidx;
+ is_wep = new->conf.cipher == WLAN_CIPHER_SUITE_WEP40 ||
+ new->conf.cipher == WLAN_CIPHER_SUITE_WEP104;
+ link_id = new->conf.link_id;
+ } else {
+ idx = old->conf.keyidx;
+ is_wep = old->conf.cipher == WLAN_CIPHER_SUITE_WEP40 ||
+ old->conf.cipher == WLAN_CIPHER_SUITE_WEP104;
+ link_id = old->conf.link_id;
+ }
+
+ if (WARN(old && old->conf.link_id != link_id,
+ "old link ID %d doesn't match new link ID %d\n",
+ old->conf.link_id, link_id))
+ return -EINVAL;
+
+ if (link_id >= 0) {
+ if (!link) {
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ return -ENOLINK;
+ }
+
+ if (sta) {
+ link_sta = rcu_dereference_protected(sta->link[link_id],
+ lockdep_is_held(&sta->local->sta_mtx));
+ if (!link_sta)
+ return -ENOLINK;
+ }
+ } else {
+ link = &sdata->deflink;
+ }
+
+ if ((is_wep || pairwise) && idx >= NUM_DEFAULT_KEYS)
+ return -EINVAL;
WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
@@ -451,8 +503,6 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
}
if (old) {
- idx = old->conf.keyidx;
-
if (old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
ieee80211_key_disable_hw_accel(old);
@@ -460,8 +510,6 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
ret = ieee80211_key_enable_hw_accel(new);
}
} else {
- /* new must be provided in case old is not */
- idx = new->conf.keyidx;
if (!new->local->wowlan)
ret = ieee80211_key_enable_hw_accel(new);
}
@@ -469,6 +517,9 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (ret)
return ret;
+ if (new)
+ list_add_tail_rcu(&new->list, &sdata->key_list);
+
if (sta) {
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
@@ -476,7 +527,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
!(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX))
_ieee80211_set_tx_key(new, true);
} else {
- rcu_assign_pointer(sta->gtk[idx], new);
+ rcu_assign_pointer(link_sta->gtk[idx], new);
}
/* Only needed for transition from no key -> key.
* Still triggers unnecessary when using Extended Key ID
@@ -490,35 +541,39 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
sdata->default_unicast_key);
defmultikey = old &&
old == key_mtx_dereference(sdata->local,
- sdata->default_multicast_key);
+ link->default_multicast_key);
defmgmtkey = old &&
old == key_mtx_dereference(sdata->local,
- sdata->default_mgmt_key);
+ link->default_mgmt_key);
defbeaconkey = old &&
old == key_mtx_dereference(sdata->local,
- sdata->default_beacon_key);
+ link->default_beacon_key);
if (defunikey && !new)
- __ieee80211_set_default_key(sdata, -1, true, false);
+ __ieee80211_set_default_key(link, -1, true, false);
if (defmultikey && !new)
- __ieee80211_set_default_key(sdata, -1, false, true);
+ __ieee80211_set_default_key(link, -1, false, true);
if (defmgmtkey && !new)
- __ieee80211_set_default_mgmt_key(sdata, -1);
+ __ieee80211_set_default_mgmt_key(link, -1);
if (defbeaconkey && !new)
- __ieee80211_set_default_beacon_key(sdata, -1);
+ __ieee80211_set_default_beacon_key(link, -1);
+
+ if (is_wep || pairwise)
+ rcu_assign_pointer(sdata->keys[idx], new);
+ else
+ rcu_assign_pointer(link->gtk[idx], new);
- rcu_assign_pointer(sdata->keys[idx], new);
if (defunikey && new)
- __ieee80211_set_default_key(sdata, new->conf.keyidx,
+ __ieee80211_set_default_key(link, new->conf.keyidx,
true, false);
if (defmultikey && new)
- __ieee80211_set_default_key(sdata, new->conf.keyidx,
+ __ieee80211_set_default_key(link, new->conf.keyidx,
false, true);
if (defmgmtkey && new)
- __ieee80211_set_default_mgmt_key(sdata,
+ __ieee80211_set_default_mgmt_key(link,
new->conf.keyidx);
if (defbeaconkey && new)
- __ieee80211_set_default_beacon_key(sdata,
+ __ieee80211_set_default_beacon_key(link,
new->conf.keyidx);
}
@@ -531,8 +586,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
struct ieee80211_key *
ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
const u8 *key_data,
- size_t seq_len, const u8 *seq,
- const struct ieee80211_cipher_scheme *cs)
+ size_t seq_len, const u8 *seq)
{
struct ieee80211_key *key;
int i, j, err;
@@ -553,6 +607,7 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
key->conf.flags = 0;
key->flags = 0;
+ key->conf.link_id = -1;
key->conf.cipher = cipher;
key->conf.keyidx = idx;
key->conf.keylen = key_len;
@@ -675,21 +730,6 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
return ERR_PTR(err);
}
break;
- default:
- if (cs) {
- if (seq_len && seq_len != cs->pn_len) {
- kfree(key);
- return ERR_PTR(-EINVAL);
- }
-
- key->conf.iv_len = cs->hdr_len;
- key->conf.icv_len = cs->mic_len;
- for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
- for (j = 0; j < seq_len; j++)
- key->u.gen.rx_pn[i][j] =
- seq[seq_len - j - 1];
- key->flags |= KEY_FLAG_CIPHER_SCHEME;
- }
}
memcpy(key->conf.key, key_data, key_len);
INIT_LIST_HEAD(&key->list);
@@ -796,11 +836,12 @@ static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
}
int ieee80211_key_link(struct ieee80211_key *key,
- struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct sta_info *sta)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
static atomic_t key_color = ATOMIC_INIT(0);
- struct ieee80211_key *old_key;
+ struct ieee80211_key *old_key = NULL;
int idx = key->conf.keyidx;
bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
/*
@@ -826,9 +867,26 @@ int ieee80211_key_link(struct ieee80211_key *key,
(old_key && old_key->conf.cipher != key->conf.cipher))
goto out;
} else if (sta) {
- old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]);
+ struct link_sta_info *link_sta = &sta->deflink;
+ int link_id = key->conf.link_id;
+
+ if (link_id >= 0) {
+ link_sta = rcu_dereference_protected(sta->link[link_id],
+ lockdep_is_held(&sta->local->sta_mtx));
+ if (!link_sta) {
+ ret = -ENOLINK;
+ goto out;
+ }
+ }
+
+ old_key = key_mtx_dereference(sdata->local, link_sta->gtk[idx]);
} else {
- old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ if (idx < NUM_DEFAULT_KEYS)
+ old_key = key_mtx_dereference(sdata->local,
+ sdata->keys[idx]);
+ if (!old_key)
+ old_key = key_mtx_dereference(sdata->local,
+ link->gtk[idx]);
}
/* Non-pairwise keys must also not switch the cipher on rekey */
@@ -859,7 +917,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
increment_tailroom_need_count(sdata);
- ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
+ ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key);
if (!ret) {
ieee80211_debugfs_key_add(key);
@@ -883,9 +941,9 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
* Replace key with nothingness if it was ever used.
*/
if (key->sdata)
- ieee80211_key_replace(key->sdata, key->sta,
- key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
- key, NULL);
+ ieee80211_key_replace(key->sdata, NULL, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
ieee80211_key_destroy(key, delay_tailroom);
}
@@ -1012,15 +1070,45 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata,
ieee80211_debugfs_key_remove_beacon_default(sdata);
list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
- ieee80211_key_replace(key->sdata, key->sta,
- key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
- key, NULL);
+ ieee80211_key_replace(key->sdata, NULL, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
list_add_tail(&key->list, keys);
}
ieee80211_debugfs_key_update_default(sdata);
}
+void ieee80211_remove_link_keys(struct ieee80211_link_data *link,
+ struct list_head *keys)
+{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_key *key, *tmp;
+
+ mutex_lock(&local->key_mtx);
+ list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
+ if (key->conf.link_id != link->link_id)
+ continue;
+ ieee80211_key_replace(key->sdata, link, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add_tail(&key->list, keys);
+ }
+ mutex_unlock(&local->key_mtx);
+}
+
+void ieee80211_free_key_list(struct ieee80211_local *local,
+ struct list_head *keys)
+{
+ struct ieee80211_key *key, *tmp;
+
+ mutex_lock(&local->key_mtx);
+ list_for_each_entry_safe(key, tmp, keys, list)
+ __ieee80211_key_destroy(key, false);
+ mutex_unlock(&local->key_mtx);
+}
+
void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
bool force_synchronize)
{
@@ -1076,13 +1164,13 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
int i;
mutex_lock(&local->key_mtx);
- for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
- key = key_mtx_dereference(local, sta->gtk[i]);
+ for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) {
+ key = key_mtx_dereference(local, sta->deflink.gtk[i]);
if (!key)
continue;
- ieee80211_key_replace(key->sdata, key->sta,
- key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
- key, NULL);
+ ieee80211_key_replace(key->sdata, NULL, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
__ieee80211_key_destroy(key, key->sdata->vif.type ==
NL80211_IFTYPE_STATION);
}
@@ -1091,9 +1179,9 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
key = key_mtx_dereference(local, sta->ptk[i]);
if (!key)
continue;
- ieee80211_key_replace(key->sdata, key->sta,
- key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
- key, NULL);
+ ieee80211_key_replace(key->sdata, NULL, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
__ieee80211_key_destroy(key, key->sdata->vif.type ==
NL80211_IFTYPE_STATION);
}
@@ -1293,14 +1381,15 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx,
keyconf->keylen, keyconf->key,
- 0, NULL, NULL);
+ 0, NULL);
if (IS_ERR(key))
return ERR_CAST(key);
if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
- err = ieee80211_key_link(key, sdata, NULL);
+ /* FIXME: this function needs to get a link ID */
+ err = ieee80211_key_link(key, &sdata->deflink, NULL);
if (err)
return ERR_PTR(err);
@@ -1356,3 +1445,37 @@ void ieee80211_key_replay(struct ieee80211_key_conf *keyconf)
}
}
EXPORT_SYMBOL_GPL(ieee80211_key_replay);
+
+int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata,
+ unsigned long del_links_mask,
+ unsigned long add_links_mask)
+{
+ struct ieee80211_key *key;
+ int ret;
+
+ list_for_each_entry(key, &sdata->key_list, list) {
+ if (key->conf.link_id < 0 ||
+ !(del_links_mask & BIT(key->conf.link_id)))
+ continue;
+
+ /* shouldn't happen for per-link keys */
+ WARN_ON(key->sta);
+
+ ieee80211_key_disable_hw_accel(key);
+ }
+
+ list_for_each_entry(key, &sdata->key_list, list) {
+ if (key->conf.link_id < 0 ||
+ !(add_links_mask & BIT(key->conf.link_id)))
+ continue;
+
+ /* shouldn't happen for per-link keys */
+ WARN_ON(key->sta);
+
+ ret = ieee80211_key_enable_hw_accel(key);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 1e326c89d721..f3df97df4b72 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -2,7 +2,7 @@
/*
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2005, Devicescape Software, Inc.
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019, 2022 Intel Corporation
*/
#ifndef IEEE80211_KEY_H
@@ -22,6 +22,7 @@
struct ieee80211_local;
struct ieee80211_sub_if_data;
+struct ieee80211_link_data;
struct sta_info;
/**
@@ -30,12 +31,10 @@ struct sta_info;
* @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present
* in the hardware for TX crypto hardware acceleration.
* @KEY_FLAG_TAINTED: Key is tainted and packets should be dropped.
- * @KEY_FLAG_CIPHER_SCHEME: This key is for a hardware cipher scheme
*/
enum ieee80211_internal_key_flags {
KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0),
KEY_FLAG_TAINTED = BIT(1),
- KEY_FLAG_CIPHER_SCHEME = BIT(2),
};
enum ieee80211_internal_tkip_state {
@@ -140,32 +139,40 @@ struct ieee80211_key {
struct ieee80211_key *
ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
const u8 *key_data,
- size_t seq_len, const u8 *seq,
- const struct ieee80211_cipher_scheme *cs);
+ size_t seq_len, const u8 *seq);
/*
* Insert a key into data structures (sdata, sta if necessary)
* to make it used, free old key. On failure, also free the new key.
*/
int ieee80211_key_link(struct ieee80211_key *key,
- struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct sta_info *sta);
int ieee80211_set_tx_key(struct ieee80211_key *key);
void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
void ieee80211_key_free_unused(struct ieee80211_key *key);
-void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
+void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx,
bool uni, bool multi);
-void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
+void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link,
int idx);
-void ieee80211_set_default_beacon_key(struct ieee80211_sub_if_data *sdata,
+void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link,
int idx);
+void ieee80211_remove_link_keys(struct ieee80211_link_data *link,
+ struct list_head *keys);
+void ieee80211_free_key_list(struct ieee80211_local *local,
+ struct list_head *keys);
void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
bool force_synchronize);
void ieee80211_free_sta_keys(struct ieee80211_local *local,
struct sta_info *sta);
void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata);
+int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata,
+ unsigned long del_links_mask,
+ unsigned long add_links_mask);
#define key_mtx_dereference(local, ref) \
rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
+#define rcu_dereference_check_key_mtx(local, ref) \
+ rcu_dereference_check(ref, lockdep_is_held(&((local)->key_mtx)))
void ieee80211_delayed_tailroom_dec(struct work_struct *wk);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
new file mode 100644
index 000000000000..e309708abae8
--- /dev/null
+++ b/net/mac80211/link.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MLO link handling
+ *
+ * Copyright (C) 2022 Intel Corporation
+ */
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "driver-ops.h"
+#include "key.h"
+
+void ieee80211_link_setup(struct ieee80211_link_data *link)
+{
+ if (link->sdata->vif.type == NL80211_IFTYPE_STATION)
+ ieee80211_mgd_setup_link(link);
+}
+
+void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
+ int link_id,
+ struct ieee80211_link_data *link,
+ struct ieee80211_bss_conf *link_conf)
+{
+ bool deflink = link_id < 0;
+
+ if (link_id < 0)
+ link_id = 0;
+
+ rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
+ rcu_assign_pointer(sdata->link[link_id], link);
+
+ link->sdata = sdata;
+ link->link_id = link_id;
+ link->conf = link_conf;
+ link_conf->link_id = link_id;
+
+ INIT_WORK(&link->csa_finalize_work,
+ ieee80211_csa_finalize_work);
+ INIT_WORK(&link->color_change_finalize_work,
+ ieee80211_color_change_finalize_work);
+ INIT_LIST_HEAD(&link->assigned_chanctx_list);
+ INIT_LIST_HEAD(&link->reserved_chanctx_list);
+ INIT_DELAYED_WORK(&link->dfs_cac_timer_work,
+ ieee80211_dfs_cac_timer_work);
+
+ if (!deflink) {
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ ether_addr_copy(link_conf->addr,
+ sdata->wdev.links[link_id].addr);
+ link_conf->bssid = link_conf->addr;
+ WARN_ON(!(sdata->wdev.valid_links & BIT(link_id)));
+ break;
+ case NL80211_IFTYPE_STATION:
+ /* station sets the bssid in ieee80211_mgd_setup_link */
+ break;
+ default:
+ WARN_ON(1);
+ }
+ }
+}
+
+void ieee80211_link_stop(struct ieee80211_link_data *link)
+{
+ if (link->sdata->vif.type == NL80211_IFTYPE_STATION)
+ ieee80211_mgd_stop_link(link);
+
+ ieee80211_link_release_channel(link);
+}
+
+struct link_container {
+ struct ieee80211_link_data data;
+ struct ieee80211_bss_conf conf;
+};
+
+static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata,
+ struct link_container **links, u16 mask)
+{
+ struct ieee80211_link_data *link;
+ LIST_HEAD(keys);
+ unsigned int link_id;
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ if (!(mask & BIT(link_id)))
+ continue;
+ link = &links[link_id]->data;
+ if (link_id == 0 && !link)
+ link = &sdata->deflink;
+ if (WARN_ON(!link))
+ continue;
+ ieee80211_remove_link_keys(link, &keys);
+ ieee80211_link_stop(link);
+ }
+
+ synchronize_rcu();
+
+ ieee80211_free_key_list(sdata->local, &keys);
+}
+
+static void ieee80211_free_links(struct ieee80211_sub_if_data *sdata,
+ struct link_container **links)
+{
+ unsigned int link_id;
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++)
+ kfree(links[link_id]);
+}
+
+static int ieee80211_check_dup_link_addrs(struct ieee80211_sub_if_data *sdata)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
+ struct ieee80211_link_data *link1;
+
+ link1 = sdata_dereference(sdata->link[i], sdata);
+ if (!link1)
+ continue;
+ for (j = i + 1; j < IEEE80211_MLD_MAX_NUM_LINKS; j++) {
+ struct ieee80211_link_data *link2;
+
+ link2 = sdata_dereference(sdata->link[j], sdata);
+ if (!link2)
+ continue;
+
+ if (ether_addr_equal(link1->conf->addr,
+ link2->conf->addr))
+ return -EALREADY;
+ }
+ }
+
+ return 0;
+}
+
+static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata,
+ u16 links)
+{
+ sdata->vif.valid_links = links;
+
+ if (!links) {
+ sdata->vif.active_links = 0;
+ return;
+ }
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ /* in an AP all links are always active */
+ sdata->vif.active_links = links;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (sdata->vif.active_links)
+ break;
+ WARN_ON(hweight16(links) > 1);
+ sdata->vif.active_links = links;
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
+ struct link_container **to_free,
+ u16 new_links)
+{
+ u16 old_links = sdata->vif.valid_links;
+ u16 old_active = sdata->vif.active_links;
+ unsigned long add = new_links & ~old_links;
+ unsigned long rem = old_links & ~new_links;
+ unsigned int link_id;
+ int ret;
+ struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS] = {}, *link;
+ struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS];
+ struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS];
+ bool use_deflink = old_links == 0; /* set for error case */
+
+ sdata_assert_lock(sdata);
+
+ memset(to_free, 0, sizeof(links));
+
+ if (old_links == new_links)
+ return 0;
+
+ /* if there were no old links, need to clear the pointers to deflink */
+ if (!old_links)
+ rem |= BIT(0);
+
+ /* allocate new link structures first */
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ links[link_id] = link;
+ }
+
+ /* keep track of the old pointers for the driver */
+ BUILD_BUG_ON(sizeof(old) != sizeof(sdata->vif.link_conf));
+ memcpy(old, sdata->vif.link_conf, sizeof(old));
+ /* and for us in error cases */
+ BUILD_BUG_ON(sizeof(old_data) != sizeof(sdata->link));
+ memcpy(old_data, sdata->link, sizeof(old_data));
+
+ /* grab old links to free later */
+ for_each_set_bit(link_id, &rem, IEEE80211_MLD_MAX_NUM_LINKS) {
+ if (rcu_access_pointer(sdata->link[link_id]) != &sdata->deflink) {
+ /*
+ * we must have allocated the data through this path so
+ * we know we can free both at the same time
+ */
+ to_free[link_id] = container_of(rcu_access_pointer(sdata->link[link_id]),
+ typeof(*links[link_id]),
+ data);
+ }
+
+ RCU_INIT_POINTER(sdata->link[link_id], NULL);
+ RCU_INIT_POINTER(sdata->vif.link_conf[link_id], NULL);
+ }
+
+ /* link them into data structures */
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ WARN_ON(!use_deflink &&
+ rcu_access_pointer(sdata->link[link_id]) == &sdata->deflink);
+
+ link = links[link_id];
+ ieee80211_link_init(sdata, link_id, &link->data, &link->conf);
+ ieee80211_link_setup(&link->data);
+ }
+
+ if (new_links == 0)
+ ieee80211_link_init(sdata, -1, &sdata->deflink,
+ &sdata->vif.bss_conf);
+
+ ret = ieee80211_check_dup_link_addrs(sdata);
+ if (!ret) {
+ /* for keys we will not be able to undo this */
+ ieee80211_tear_down_links(sdata, to_free, rem);
+
+ ieee80211_set_vif_links_bitmaps(sdata, new_links);
+
+ /* tell the driver */
+ ret = drv_change_vif_links(sdata->local, sdata,
+ old_links & old_active,
+ new_links & sdata->vif.active_links,
+ old);
+ }
+
+ if (ret) {
+ /* restore config */
+ memcpy(sdata->link, old_data, sizeof(old_data));
+ memcpy(sdata->vif.link_conf, old, sizeof(old));
+ ieee80211_set_vif_links_bitmaps(sdata, old_links);
+ /* and free (only) the newly allocated links */
+ memset(to_free, 0, sizeof(links));
+ goto free;
+ }
+
+ /* use deflink/bss_conf again if and only if there are no more links */
+ use_deflink = new_links == 0;
+
+ goto deinit;
+free:
+ /* if we failed during allocation, only free all */
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ kfree(links[link_id]);
+ links[link_id] = NULL;
+ }
+deinit:
+ if (use_deflink)
+ ieee80211_link_init(sdata, -1, &sdata->deflink,
+ &sdata->vif.bss_conf);
+ return ret;
+}
+
+int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata,
+ u16 new_links)
+{
+ struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS];
+ int ret;
+
+ ret = ieee80211_vif_update_links(sdata, links, new_links);
+ ieee80211_free_links(sdata, links);
+
+ return ret;
+}
+
+void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata)
+{
+ struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS];
+
+ /*
+ * The locking here is different because when we free links
+ * in the station case we need to be able to cancel_work_sync()
+ * something that also takes the lock.
+ */
+
+ sdata_lock(sdata);
+ ieee80211_vif_update_links(sdata, links, 0);
+ sdata_unlock(sdata);
+
+ ieee80211_free_links(sdata, links);
+}
+
+static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
+ u16 active_links)
+{
+ struct ieee80211_bss_conf *link_confs[IEEE80211_MLD_MAX_NUM_LINKS];
+ struct ieee80211_local *local = sdata->local;
+ u16 old_active = sdata->vif.active_links;
+ unsigned long rem = old_active & ~active_links;
+ unsigned long add = active_links & ~old_active;
+ struct sta_info *sta;
+ unsigned int link_id;
+ int ret, i;
+
+ if (!ieee80211_sdata_running(sdata))
+ return -ENETDOWN;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return -EINVAL;
+
+ /* cannot activate links that don't exist */
+ if (active_links & ~sdata->vif.valid_links)
+ return -EINVAL;
+
+ /* nothing to do */
+ if (old_active == active_links)
+ return 0;
+
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++)
+ link_confs[i] = sdata_dereference(sdata->vif.link_conf[i],
+ sdata);
+
+ if (add) {
+ sdata->vif.active_links |= active_links;
+ ret = drv_change_vif_links(local, sdata,
+ old_active,
+ sdata->vif.active_links,
+ link_confs);
+ if (ret) {
+ sdata->vif.active_links = old_active;
+ return ret;
+ }
+ }
+
+ for_each_set_bit(link_id, &rem, IEEE80211_MLD_MAX_NUM_LINKS) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+
+ /* FIXME: kill TDLS connections on the link */
+
+ ieee80211_link_release_channel(link);
+ }
+
+ list_for_each_entry(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata)
+ continue;
+ ret = drv_change_sta_links(local, sdata, &sta->sta,
+ old_active,
+ old_active | active_links);
+ WARN_ON_ONCE(ret);
+ }
+
+ ret = ieee80211_key_switch_links(sdata, rem, add);
+ WARN_ON_ONCE(ret);
+
+ list_for_each_entry(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata)
+ continue;
+ ret = drv_change_sta_links(local, sdata, &sta->sta,
+ old_active | active_links,
+ active_links);
+ WARN_ON_ONCE(ret);
+ }
+
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+
+ ret = ieee80211_link_use_channel(link, &link->conf->chandef,
+ IEEE80211_CHANCTX_SHARED);
+ WARN_ON_ONCE(ret);
+
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_ERP_CTS_PROT |
+ BSS_CHANGED_ERP_PREAMBLE |
+ BSS_CHANGED_ERP_SLOT |
+ BSS_CHANGED_HT |
+ BSS_CHANGED_BASIC_RATES |
+ BSS_CHANGED_BSSID |
+ BSS_CHANGED_CQM |
+ BSS_CHANGED_QOS |
+ BSS_CHANGED_TXPOWER |
+ BSS_CHANGED_BANDWIDTH |
+ BSS_CHANGED_TWT |
+ BSS_CHANGED_HE_OBSS_PD |
+ BSS_CHANGED_HE_BSS_COLOR);
+ ieee80211_mgd_set_link_qos_params(link);
+ }
+
+ old_active = sdata->vif.active_links;
+ sdata->vif.active_links = active_links;
+
+ if (rem) {
+ ret = drv_change_vif_links(local, sdata, old_active,
+ active_links, link_confs);
+ WARN_ON_ONCE(ret);
+ }
+
+ return 0;
+}
+
+int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ u16 old_active;
+ int ret;
+
+ sdata_lock(sdata);
+ mutex_lock(&local->sta_mtx);
+ mutex_lock(&local->mtx);
+ mutex_lock(&local->key_mtx);
+ old_active = sdata->vif.active_links;
+ if (old_active & active_links) {
+ /*
+ * if there's at least one link that stays active across
+ * the change then switch to it (to those) first, and
+ * then enable the additional links
+ */
+ ret = _ieee80211_set_active_links(sdata,
+ old_active & active_links);
+ if (!ret)
+ ret = _ieee80211_set_active_links(sdata, active_links);
+ } else {
+ /* otherwise switch directly */
+ ret = _ieee80211_set_active_links(sdata, active_links);
+ }
+ mutex_unlock(&local->key_mtx);
+ mutex_unlock(&local->mtx);
+ mutex_unlock(&local->sta_mtx);
+ sdata_unlock(sdata);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ieee80211_set_active_links);
+
+void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
+ u16 active_links)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ if (!ieee80211_sdata_running(sdata))
+ return;
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ return;
+
+ /* cannot activate links that don't exist */
+ if (active_links & ~sdata->vif.valid_links)
+ return;
+
+ /* nothing to do */
+ if (sdata->vif.active_links == active_links)
+ return;
+
+ sdata->desired_active_links = active_links;
+ schedule_work(&sdata->activate_links_work);
+}
+EXPORT_SYMBOL_GPL(ieee80211_set_active_links_async);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5311c3cd3050..02b5abc7326b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <net/mac80211.h>
@@ -147,7 +147,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (!rcu_access_pointer(sdata->vif.chanctx_conf))
+ if (!rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf))
continue;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
continue;
@@ -175,7 +175,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
changed |= ieee80211_hw_conf_chan(local);
else
changed &= ~(IEEE80211_CONF_CHANGE_CHANNEL |
- IEEE80211_CONF_CHANGE_POWER);
+ IEEE80211_CONF_CHANGE_POWER |
+ IEEE80211_CONF_CHANGE_SMPS);
if (changed && local->open_count) {
ret = drv_config(local, changed);
@@ -199,15 +200,94 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
return ret;
}
+#define BSS_CHANGED_VIF_CFG_FLAGS (BSS_CHANGED_ASSOC |\
+ BSS_CHANGED_IDLE |\
+ BSS_CHANGED_PS |\
+ BSS_CHANGED_IBSS |\
+ BSS_CHANGED_ARP_FILTER |\
+ BSS_CHANGED_SSID)
+
void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
- u32 changed)
+ u64 changed)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ might_sleep();
+
+ if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ return;
+
+ if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON |
+ BSS_CHANGED_BEACON_ENABLED) &&
+ sdata->vif.type != NL80211_IFTYPE_AP &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+ sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
+ sdata->vif.type != NL80211_IFTYPE_OCB))
+ return;
+
+ if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
+ sdata->vif.type == NL80211_IFTYPE_NAN ||
+ (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !sdata->vif.bss_conf.mu_mimo_owner &&
+ !(changed & BSS_CHANGED_TXPOWER))))
+ return;
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (changed & BSS_CHANGED_VIF_CFG_FLAGS) {
+ u64 ch = changed & BSS_CHANGED_VIF_CFG_FLAGS;
+
+ trace_drv_vif_cfg_changed(local, sdata, changed);
+ if (local->ops->vif_cfg_changed)
+ local->ops->vif_cfg_changed(&local->hw, &sdata->vif, ch);
+ }
+
+ if (changed & ~BSS_CHANGED_VIF_CFG_FLAGS) {
+ u64 ch = changed & ~BSS_CHANGED_VIF_CFG_FLAGS;
+
+ /* FIXME: should be for each link */
+ trace_drv_link_info_changed(local, sdata, &sdata->vif.bss_conf,
+ changed);
+ if (local->ops->link_info_changed)
+ local->ops->link_info_changed(&local->hw, &sdata->vif,
+ &sdata->vif.bss_conf, ch);
+ }
+
+ if (local->ops->bss_info_changed)
+ local->ops->bss_info_changed(&local->hw, &sdata->vif,
+ &sdata->vif.bss_conf, changed);
+ trace_drv_return_void(local);
+}
+
+void ieee80211_vif_cfg_change_notify(struct ieee80211_sub_if_data *sdata,
+ u64 changed)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ WARN_ON_ONCE(changed & ~BSS_CHANGED_VIF_CFG_FLAGS);
+
+ if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ return;
+
+ drv_vif_cfg_changed(local, sdata, changed);
+}
+
+void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ u64 changed)
{
struct ieee80211_local *local = sdata->local;
+ WARN_ON_ONCE(changed & BSS_CHANGED_VIF_CFG_FLAGS);
+
if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
return;
- drv_bss_info_changed(local, sdata, &sdata->vif.bss_conf, changed);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ drv_link_info_changed(local, sdata, link->conf, link->link_id, changed);
}
u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
@@ -284,11 +364,11 @@ static void ieee80211_restart_work(struct work_struct *work)
* Then we can have a race...
*/
cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work);
- if (sdata->vif.csa_active) {
+ if (sdata->vif.bss_conf.csa_active) {
sdata_lock(sdata);
ieee80211_sta_connection_lost(sdata,
- sdata->u.mgd.associated->bssid,
- WLAN_REASON_UNSPECIFIED, false);
+ WLAN_REASON_UNSPECIFIED,
+ false);
sdata_unlock(sdata);
}
}
@@ -349,7 +429,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
struct wireless_dev *wdev = ndev->ieee80211_ptr;
struct in_device *idev;
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_bss_conf *bss_conf;
+ struct ieee80211_vif_cfg *vif_cfg;
struct ieee80211_if_managed *ifmgd;
int c = 0;
@@ -361,7 +441,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
return NOTIFY_DONE;
sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
- bss_conf = &sdata->vif.bss_conf;
+ vif_cfg = &sdata->vif.cfg;
/* ARP filtering is only supported in managed mode */
if (sdata->vif.type != NL80211_IFTYPE_STATION)
@@ -374,21 +454,20 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
ifmgd = &sdata->u.mgd;
sdata_lock(sdata);
- /* Copy the addresses to the bss_conf list */
+ /* Copy the addresses to the vif config list */
ifa = rtnl_dereference(idev->ifa_list);
while (ifa) {
if (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN)
- bss_conf->arp_addr_list[c] = ifa->ifa_address;
+ vif_cfg->arp_addr_list[c] = ifa->ifa_address;
ifa = rtnl_dereference(ifa->ifa_next);
c++;
}
- bss_conf->arp_addr_cnt = c;
+ vif_cfg->arp_addr_cnt = c;
/* Configure driver only if associated (which also implies it is up) */
if (ifmgd->associated)
- ieee80211_bss_info_change_notify(sdata,
- BSS_CHANGED_ARP_FILTER);
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER);
sdata_unlock(sdata);
@@ -557,6 +636,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
if (WARN_ON(ops->sta_state && (ops->sta_add || ops->sta_remove)))
return NULL;
+ if (WARN_ON(!!ops->link_info_changed != !!ops->vif_cfg_changed ||
+ (ops->link_info_changed && ops->bss_info_changed)))
+ return NULL;
+
/* check all or no channel context operations exist */
i = !!ops->add_chanctx + !!ops->remove_chanctx +
!!ops->change_chanctx + !!ops->assign_vif_chanctx +
@@ -616,6 +699,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS);
wiphy_ext_feature_set(wiphy,
NL80211_EXT_FEATURE_SCAN_FREQ_KHZ);
+ wiphy_ext_feature_set(wiphy,
+ NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE);
if (!ops->hw_scan) {
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -707,14 +792,12 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
spin_lock_init(&local->queue_stop_reason_lock);
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- struct airtime_sched_info *air_sched = &local->airtime[i];
-
- air_sched->active_txqs = RB_ROOT_CACHED;
- INIT_LIST_HEAD(&air_sched->active_list);
- spin_lock_init(&air_sched->lock);
- air_sched->aql_txq_limit_low = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
- air_sched->aql_txq_limit_high =
+ INIT_LIST_HEAD(&local->active_txqs[i]);
+ spin_lock_init(&local->active_txq_lock[i]);
+ local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
+ local->aql_txq_limit_high[i] =
IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
+ atomic_set(&local->aql_ac_pending_airtime[i], 0);
}
local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
@@ -778,7 +861,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
{
bool have_wep = !fips_enabled; /* FIPS does not permit the use of RC4 */
bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
- int n_suites = 0, r = 0, w = 0;
+ int r = 0, w = 0;
u32 *suites;
static const u32 cipher_suites[] = {
/* keep WEP first, it may be removed below */
@@ -824,10 +907,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
continue;
suites[w++] = suite;
}
- } else if (!local->hw.cipher_schemes) {
- /* If the driver doesn't have cipher schemes, there's nothing
- * else to do other than assign the (software supported and
- * perhaps offloaded) cipher suites.
+ } else {
+ /* assign the (software supported and perhaps offloaded)
+ * cipher suites
*/
local->hw.wiphy->cipher_suites = cipher_suites;
local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
@@ -842,58 +924,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
/* not dynamically allocated, so just return */
return 0;
- } else {
- const struct ieee80211_cipher_scheme *cs;
-
- cs = local->hw.cipher_schemes;
-
- /* Driver specifies cipher schemes only (but not cipher suites
- * including the schemes)
- *
- * We start counting ciphers defined by schemes, TKIP, CCMP,
- * CCMP-256, GCMP, and GCMP-256
- */
- n_suites = local->hw.n_cipher_schemes + 5;
-
- /* check if we have WEP40 and WEP104 */
- if (have_wep)
- n_suites += 2;
-
- /* check if we have AES_CMAC, BIP-CMAC-256, BIP-GMAC-128,
- * BIP-GMAC-256
- */
- if (have_mfp)
- n_suites += 4;
-
- suites = kmalloc_array(n_suites, sizeof(u32), GFP_KERNEL);
- if (!suites)
- return -ENOMEM;
-
- suites[w++] = WLAN_CIPHER_SUITE_CCMP;
- suites[w++] = WLAN_CIPHER_SUITE_CCMP_256;
- suites[w++] = WLAN_CIPHER_SUITE_TKIP;
- suites[w++] = WLAN_CIPHER_SUITE_GCMP;
- suites[w++] = WLAN_CIPHER_SUITE_GCMP_256;
-
- if (have_wep) {
- suites[w++] = WLAN_CIPHER_SUITE_WEP40;
- suites[w++] = WLAN_CIPHER_SUITE_WEP104;
- }
-
- if (have_mfp) {
- suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC;
- suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256;
- suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_128;
- suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256;
- }
-
- for (r = 0; r < local->hw.n_cipher_schemes; r++) {
- suites[w++] = cs[r].cipher;
- if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN)) {
- kfree(suites);
- return -EINVAL;
- }
- }
}
local->hw.wiphy->cipher_suites = suites;
@@ -909,7 +939,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
int result, i;
enum nl80211_band band;
int channels, max_bitrates;
- bool supp_ht, supp_vht, supp_he;
+ bool supp_ht, supp_vht, supp_he, supp_eht;
struct cfg80211_chan_def dflt_chandef = {};
if (ieee80211_hw_check(hw, QUEUE_CONTROL) &&
@@ -932,6 +962,52 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
(!local->ops->start_nan || !local->ops->stop_nan)))
return -EINVAL;
+ if (hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) {
+ /*
+ * For drivers capable of doing MLO, assume modern driver
+ * or firmware facilities, so software doesn't have to do
+ * as much, e.g. monitoring beacons would be hard if we
+ * might not even know which link is active at which time.
+ */
+ if (WARN_ON(!local->use_chanctx))
+ return -EINVAL;
+
+ if (WARN_ON(!local->ops->link_info_changed))
+ return -EINVAL;
+
+ if (WARN_ON(!ieee80211_hw_check(hw, HAS_RATE_CONTROL)))
+ return -EINVAL;
+
+ if (WARN_ON(!ieee80211_hw_check(hw, AMPDU_AGGREGATION)))
+ return -EINVAL;
+
+ if (WARN_ON(ieee80211_hw_check(hw, HOST_BROADCAST_PS_BUFFERING)))
+ return -EINVAL;
+
+ if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_PS) &&
+ (!ieee80211_hw_check(hw, SUPPORTS_DYNAMIC_PS) ||
+ ieee80211_hw_check(hw, PS_NULLFUNC_STACK))))
+ return -EINVAL;
+
+ if (WARN_ON(!ieee80211_hw_check(hw, MFP_CAPABLE)))
+ return -EINVAL;
+
+ if (WARN_ON(!ieee80211_hw_check(hw, CONNECTION_MONITOR)))
+ return -EINVAL;
+
+ if (WARN_ON(ieee80211_hw_check(hw, NEED_DTIM_BEFORE_ASSOC)))
+ return -EINVAL;
+
+ if (WARN_ON(ieee80211_hw_check(hw, TIMING_BEACON_ONLY)))
+ return -EINVAL;
+
+ if (WARN_ON(!ieee80211_hw_check(hw, AP_LINK_PS)))
+ return -EINVAL;
+
+ if (WARN_ON(ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP)))
+ return -EINVAL;
+ }
+
#ifdef CONFIG_PM
if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume))
return -EINVAL;
@@ -978,6 +1054,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_ht = false;
supp_vht = false;
supp_he = false;
+ supp_eht = false;
for (band = 0; band < NUM_NL80211_BANDS; band++) {
struct ieee80211_supported_band *sband;
@@ -1021,6 +1098,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
iftd = &sband->iftype_data[i];
supp_he = supp_he || iftd->he_cap.has_he;
+ supp_eht = supp_eht || iftd->eht_cap.has_eht;
}
/* HT, VHT, HE require QoS, thus >= 4 queues */
@@ -1028,6 +1106,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
(supp_ht || supp_vht || supp_he)))
return -EINVAL;
+ /* EHT requires HE support */
+ if (WARN_ON(supp_eht && !supp_he))
+ return -EINVAL;
+
if (!sband->ht_cap.ht_supported)
continue;
@@ -1138,6 +1220,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
3 + sizeof(struct ieee80211_he_cap_elem) +
sizeof(struct ieee80211_he_mcs_nss_supp) +
IEEE80211_HE_PPE_THRES_MAX_LEN;
+
+ if (supp_eht)
+ local->scan_ies_len +=
+ 3 + sizeof(struct ieee80211_eht_cap_elem) +
+ sizeof(struct ieee80211_eht_mcs_nss_supp) +
+ IEEE80211_EHT_PPE_THRES_MAX_LEN;
}
if (!local->ops->hw_scan) {
@@ -1156,12 +1244,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (local->hw.wiphy->max_scan_ie_len)
local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
- if (WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
- local->hw.n_cipher_schemes))) {
- result = -EINVAL;
- goto fail_workqueue;
- }
-
result = ieee80211_init_cipher_suites(local);
if (result < 0)
goto fail_workqueue;
@@ -1357,8 +1439,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
fail_workqueue:
- if (local->wiphy_ciphers_allocated)
+ if (local->wiphy_ciphers_allocated) {
kfree(local->hw.wiphy->cipher_suites);
+ local->wiphy_ciphers_allocated = false;
+ }
kfree(local->int_scan_req);
return result;
}
@@ -1426,8 +1510,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
mutex_destroy(&local->iflist_mtx);
mutex_destroy(&local->mtx);
- if (local->wiphy_ciphers_allocated)
+ if (local->wiphy_ciphers_allocated) {
kfree(local->hw.wiphy->cipher_suites);
+ local->wiphy_ciphers_allocated = false;
+ }
idr_for_each(&local->ack_status_frames,
ieee80211_free_ack_frame, NULL);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 15ac08d111ea..5a99b8f6e465 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -104,7 +104,8 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
ie->vht_operation, ie->ht_operation,
&sta_chan_def);
- ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, &sta_chan_def);
+ ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, NULL,
+ &sta_chan_def);
if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
&sta_chan_def))
@@ -398,7 +399,7 @@ static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
return -EINVAL;
@@ -454,7 +455,7 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
u8 *pos;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
return -EINVAL;
@@ -526,7 +527,7 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
u8 *pos;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
return -EINVAL;
@@ -580,7 +581,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
pos = skb_put(skb, ie_len);
- ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len);
+ ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len);
return 0;
}
@@ -633,7 +634,7 @@ int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
if (!iftd)
return 0;
- ieee80211_ie_build_he_6ghz_cap(sdata, skb);
+ ieee80211_ie_build_he_6ghz_cap(sdata, sdata->deflink.smps_mode, skb);
return 0;
}
@@ -819,7 +820,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
band = chanctx_conf->def.chan->band;
rcu_read_unlock();
@@ -852,7 +853,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
/* need an skb for IE builders to operate on */
- skb = dev_alloc_skb(max(head_len, tail_len));
+ skb = __dev_alloc_skb(max(head_len, tail_len), GFP_KERNEL);
if (!bcn || !skb)
goto out_free;
@@ -992,8 +993,7 @@ ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata)
struct beacon_data *old_bcn;
int ret;
- old_bcn = rcu_dereference_protected(sdata->u.mesh.beacon,
- lockdep_is_held(&sdata->wdev.mtx));
+ old_bcn = sdata_dereference(sdata->u.mesh.beacon, sdata);
ret = ieee80211_mesh_build_beacon(&sdata->u.mesh);
if (ret)
/* just reuse old beacon */
@@ -1056,7 +1056,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
}
ieee80211_recalc_dtim(local, sdata);
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
netif_carrier_on(sdata->dev);
return 0;
@@ -1080,11 +1080,11 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
sdata->vif.bss_conf.enable_beacon = false;
sdata->beacon_rate_set = false;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_BEACON_ENABLED);
/* remove beacon */
- bcn = rcu_dereference_protected(ifmsh->beacon,
- lockdep_is_held(&sdata->wdev.mtx));
+ bcn = sdata_dereference(ifmsh->beacon, sdata);
RCU_INIT_POINTER(ifmsh->beacon, NULL);
kfree_rcu(bcn, rcu_head);
@@ -1129,7 +1129,8 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_supported_band *sband;
int err;
- u32 sta_flags, vht_cap_info = 0;
+ ieee80211_conn_flags_t conn_flags = 0;
+ u32 vht_cap_info = 0;
sdata_assert_lock(sdata);
@@ -1137,16 +1138,15 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
if (!sband)
return false;
- sta_flags = 0;
switch (sdata->vif.bss_conf.chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
- sta_flags |= IEEE80211_STA_DISABLE_HT;
+ conn_flags |= IEEE80211_CONN_DISABLE_HT;
fallthrough;
case NL80211_CHAN_WIDTH_20:
- sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
+ conn_flags |= IEEE80211_CONN_DISABLE_40MHZ;
fallthrough;
case NL80211_CHAN_WIDTH_40:
- sta_flags |= IEEE80211_STA_DISABLE_VHT;
+ conn_flags |= IEEE80211_CONN_DISABLE_VHT;
break;
default:
break;
@@ -1159,7 +1159,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
vht_cap_info,
- sta_flags, sdata->vif.addr,
+ conn_flags, sdata->vif.addr,
&csa_ie);
if (err < 0)
return false;
@@ -1256,8 +1256,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
- elems = ieee802_11_parse_elems(pos, len - baselen, false, mgmt->bssid,
- NULL);
+ elems = ieee802_11_parse_elems(pos, len - baselen, false, NULL);
if (!elems)
return;
@@ -1326,7 +1325,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable,
len - baselen,
- false, mgmt->bssid, NULL);
+ false, NULL);
if (!elems)
return;
@@ -1356,7 +1355,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
rx_status);
if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT &&
- !sdata->vif.csa_active)
+ !sdata->vif.bss_conf.csa_active)
ieee80211_mesh_process_chnswitch(sdata, elems, true);
}
@@ -1379,8 +1378,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
ifmsh->chsw_ttl = 0;
/* Remove the CSA and MCSP elements from the beacon */
- tmp_csa_settings = rcu_dereference_protected(ifmsh->csa,
- lockdep_is_held(&sdata->wdev.mtx));
+ tmp_csa_settings = sdata_dereference(ifmsh->csa, sdata);
RCU_INIT_POINTER(ifmsh->csa, NULL);
if (tmp_csa_settings)
kfree_rcu(tmp_csa_settings, rcu_head);
@@ -1469,8 +1467,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
pos = mgmt->u.action.u.chan_switch.variable;
baselen = offsetof(struct ieee80211_mgmt,
u.action.u.chan_switch.variable);
- elems = ieee802_11_parse_elems(pos, len - baselen, true,
- mgmt->bssid, NULL);
+ elems = ieee802_11_parse_elems(pos, len - baselen, true, NULL);
if (!elems)
return;
@@ -1487,7 +1484,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata,
ifmsh->pre_value = pre_value;
- if (!sdata->vif.csa_active &&
+ if (!sdata->vif.bss_conf.csa_active &&
!ieee80211_mesh_process_chnswitch(sdata, elems, false)) {
mcsa_dbg(sdata, "Failed to process CSA action frame");
goto free;
@@ -1580,7 +1577,7 @@ static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
if (ieee80211_mesh_rebuild_beacon(sdata))
return;
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
}
void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 44a6fdb6efbd..9b1ce7c3925a 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021 Intel Corporation
+ * Copyright (C) 2019, 2021-2022 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
@@ -247,13 +247,13 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
return -EAGAIN;
skb = dev_alloc_skb(local->tx_headroom +
- sdata->encrypt_headroom +
+ IEEE80211_ENCRYPT_HEADROOM +
IEEE80211_ENCRYPT_TAILROOM +
hdr_len +
2 + 15 /* PERR IE */);
if (!skb)
return -1;
- skb_reserve(skb, local->tx_headroom + sdata->encrypt_headroom);
+ skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM);
mgmt = skb_put_zero(skb, hdr_len);
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ACTION);
@@ -310,7 +310,12 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
LINK_FAIL_THRESH)
mesh_plink_broken(sta);
- sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+ /* use rate info set by the driver directly if present */
+ if (st->n_rates)
+ rinfo = sta->deflink.tx_stats.last_rate_info;
+ else
+ sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &rinfo);
+
ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg,
cfg80211_calculate_bitrate(&rinfo));
}
@@ -927,7 +932,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
- len - baselen, false, mgmt->bssid, NULL);
+ len - baselen, false, NULL);
if (!elems)
return;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index acc1c299f1ae..69d5e1ec6ede 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -710,7 +710,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
- kfree_skb(skb);
+ ieee80211_free_txskb(&sdata->local->hw, skb);
sdata->u.mesh.mshstats.dropped_frames_no_route++;
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index a829470dd59e..ddfe5102b9a4 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021 Intel Corporation
+ * Copyright (C) 2019, 2021-2022 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
#include <linux/gfp.h>
@@ -61,8 +61,8 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold;
return rssi_threshold == 0 ||
(sta &&
- (s8)-ewma_signal_read(&sta->rx_stats_avg.signal) >
- rssi_threshold);
+ (s8)-ewma_signal_read(&sta->deflink.rx_stats_avg.signal) >
+ rssi_threshold);
}
/**
@@ -125,7 +125,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
continue;
short_slot = false;
- if (erp_rates & sta->sta.supp_rates[sband->band])
+ if (erp_rates & sta->sta.deflink.supp_rates[sband->band])
short_slot = true;
else
break;
@@ -175,10 +175,10 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
sta->mesh->plink_state != NL80211_PLINK_ESTAB)
continue;
- if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20)
+ if (sta->sta.deflink.bandwidth > IEEE80211_STA_RX_BW_20)
continue;
- if (!sta->sta.ht_cap.ht_supported) {
+ if (!sta->sta.deflink.ht_cap.ht_supported) {
mpl_dbg(sdata, "nonHT sta (%pM) is present\n",
sta->sta.addr);
non_ht_sta = true;
@@ -415,7 +415,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
u32 rates, basic_rates = 0, changed = 0;
- enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
+ enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth;
sband = ieee80211_get_sband(sdata);
if (!sband)
@@ -425,7 +425,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
&basic_rates);
spin_lock_bh(&sta->mesh->plink_lock);
- sta->rx_stats.last_rx = jiffies;
+ sta->deflink.rx_stats.last_rx = jiffies;
/* rates and capabilities don't change during peering */
if (sta->mesh->plink_state == NL80211_PLINK_ESTAB &&
@@ -433,38 +433,40 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
goto out;
sta->mesh->processed_beacon = true;
- if (sta->sta.supp_rates[sband->band] != rates)
+ if (sta->sta.deflink.supp_rates[sband->band] != rates)
changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
- sta->sta.supp_rates[sband->band] = rates;
+ sta->sta.deflink.supp_rates[sband->band] = rates;
if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
- elems->ht_cap_elem, sta))
+ elems->ht_cap_elem,
+ &sta->deflink))
changed |= IEEE80211_RC_BW_CHANGED;
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- elems->vht_cap_elem, sta);
+ elems->vht_cap_elem,
+ &sta->deflink);
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
elems->he_cap_len,
elems->he_6ghz_capa,
- sta);
+ &sta->deflink);
- if (bw != sta->sta.bandwidth)
+ if (bw != sta->sta.deflink.bandwidth)
changed |= IEEE80211_RC_BW_CHANGED;
/* HT peer is operating 20MHz-only */
if (elems->ht_operation &&
!(elems->ht_operation->ht_param &
IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) {
- if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20)
+ if (sta->sta.deflink.bandwidth != IEEE80211_STA_RX_BW_20)
changed |= IEEE80211_RC_BW_CHANGED;
- sta->sta.bandwidth = IEEE80211_STA_RX_BW_20;
+ sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20;
}
if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
rate_control_rate_init(sta);
else
- rate_control_rate_update(local, sband, sta, changed);
+ rate_control_rate_update(local, sband, sta, 0, changed);
out:
spin_unlock_bh(&sta->mesh->plink_lock);
}
@@ -475,8 +477,7 @@ static int mesh_allocate_aid(struct ieee80211_sub_if_data *sdata)
unsigned long *aid_map;
int aid;
- aid_map = kcalloc(BITS_TO_LONGS(IEEE80211_MAX_AID + 1),
- sizeof(*aid_map), GFP_KERNEL);
+ aid_map = bitmap_zalloc(IEEE80211_MAX_AID + 1, GFP_KERNEL);
if (!aid_map)
return -ENOMEM;
@@ -489,7 +490,7 @@ static int mesh_allocate_aid(struct ieee80211_sub_if_data *sdata)
rcu_read_unlock();
aid = find_first_zero_bit(aid_map, IEEE80211_MAX_AID + 1);
- kfree(aid_map);
+ bitmap_free(aid_map);
if (aid > IEEE80211_MAX_AID)
return -ENOBUFS;
@@ -1228,8 +1229,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
}
- elems = ieee802_11_parse_elems(baseaddr, len - baselen, true,
- mgmt->bssid, NULL);
+ elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL);
mesh_process_plink_frame(sdata, mgmt, elems, rx_status);
kfree(elems);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1eeabdf10052..d8484cd870de 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2021 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*/
#include <linux/delay.h>
@@ -37,6 +37,7 @@
#define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2)
#define IEEE80211_AUTH_MAX_TRIES 3
#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
+#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2)
#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
@@ -141,21 +142,24 @@ static int ecw2cw(int ecw)
return (1 << ecw) - 1;
}
-static u32
+static ieee80211_conn_flags_t
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ ieee80211_conn_flags_t conn_flags,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
u32 vht_cap_info,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
const struct ieee80211_he_operation *he_oper,
+ const struct ieee80211_eht_operation *eht_oper,
const struct ieee80211_s1g_oper_ie *s1g_oper,
struct cfg80211_chan_def *chandef, bool tracking)
{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct cfg80211_chan_def vht_chandef;
struct ieee80211_sta_ht_cap sta_ht_cap;
- u32 ht_cfreq, ret;
+ ieee80211_conn_flags_t ret;
+ u32 ht_cfreq;
memset(chandef, 0, sizeof(struct cfg80211_chan_def));
chandef->chan = channel;
@@ -164,12 +168,14 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
chandef->freq1_offset = channel->freq_offset;
if (channel->band == NL80211_BAND_6GHZ) {
- if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef)) {
+ if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, eht_oper,
+ chandef)) {
mlme_dbg(sdata,
- "bad 6 GHz operation, disabling HT/VHT/HE\n");
- ret = IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_HE;
+ "bad 6 GHz operation, disabling HT/VHT/HE/EHT\n");
+ ret = IEEE80211_CONN_DISABLE_HT |
+ IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
} else {
ret = 0;
}
@@ -182,10 +188,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
chandef->width = ieee80211_s1g_channel_width(channel);
}
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_40MHZ |
- IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ;
+ ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ |
+ IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_80P80MHZ |
+ IEEE80211_CONN_DISABLE_160MHZ;
goto out;
}
@@ -194,9 +200,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
if (!ht_oper || !sta_ht_cap.ht_supported) {
mlme_dbg(sdata, "HT operation missing / HT not supported\n");
- ret = IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_HE;
+ ret = IEEE80211_CONN_DISABLE_HT |
+ IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
goto out;
}
@@ -217,9 +224,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
"Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
channel->center_freq, ht_cfreq,
ht_oper->primary_chan, channel->band);
- ret = IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_HE;
+ ret = IEEE80211_CONN_DISABLE_HT |
+ IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
goto out;
}
@@ -229,20 +237,21 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
} else {
mlme_dbg(sdata, "40 MHz not supported\n");
/* 40 MHz (and 80 MHz) must be supported for VHT */
- ret = IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_VHT;
/* also mark 40 MHz disabled */
- ret |= IEEE80211_STA_DISABLE_40MHZ;
+ ret |= IEEE80211_CONN_DISABLE_40MHZ;
goto out;
}
if (!vht_oper || !sband->vht_cap.vht_supported) {
mlme_dbg(sdata, "VHT operation missing / VHT not supported\n");
- ret = IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_VHT;
goto out;
}
vht_chandef = *chandef;
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && he_oper &&
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ he_oper &&
(le32_to_cpu(he_oper->he_oper_params) &
IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
struct ieee80211_vht_operation he_oper_vht_cap;
@@ -257,28 +266,28 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
&he_oper_vht_cap, ht_oper,
&vht_chandef)) {
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_HE))
sdata_info(sdata,
"HE AP VHT information is invalid, disabling HE\n");
- ret = IEEE80211_STA_DISABLE_HE;
+ ret = IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT;
goto out;
}
} else if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
vht_cap_info,
vht_oper, ht_oper,
&vht_chandef)) {
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information is invalid, disabling VHT\n");
- ret = IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_VHT;
goto out;
}
if (!cfg80211_chandef_valid(&vht_chandef)) {
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information is invalid, disabling VHT\n");
- ret = IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_VHT;
goto out;
}
@@ -288,15 +297,47 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
}
if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information doesn't match HT, disabling VHT\n");
- ret = IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_VHT;
goto out;
}
*chandef = vht_chandef;
+ /*
+ * handle the case that the EHT operation indicates that it holds EHT
+ * operation information (in case that the channel width differs from
+ * the channel width reported in HT/VHT/HE).
+ */
+ if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
+ struct cfg80211_chan_def eht_chandef = *chandef;
+
+ ieee80211_chandef_eht_oper(eht_oper,
+ eht_chandef.width ==
+ NL80211_CHAN_WIDTH_160,
+ false, &eht_chandef);
+
+ if (!cfg80211_chandef_valid(&eht_chandef)) {
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT))
+ sdata_info(sdata,
+ "AP EHT information is invalid, disabling EHT\n");
+ ret = IEEE80211_CONN_DISABLE_EHT;
+ goto out;
+ }
+
+ if (!cfg80211_chandef_compatible(chandef, &eht_chandef)) {
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT))
+ sdata_info(sdata,
+ "AP EHT information is incompatible, disabling EHT\n");
+ ret = IEEE80211_CONN_DISABLE_EHT;
+ goto out;
+ }
+
+ *chandef = eht_chandef;
+ }
+
ret = 0;
out:
@@ -319,11 +360,11 @@ out:
* less common and wouldn't completely prevent using the AP.
*/
if (tracking &&
- cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef))
+ cfg80211_chandef_identical(chandef, &link->conf->chandef))
return ret;
/* don't print the message below for VHT mismatch if VHT is disabled */
- if (ret & IEEE80211_STA_DISABLE_VHT)
+ if (ret & IEEE80211_CONN_DISABLE_VHT)
vht_chandef = *chandef;
/*
@@ -338,9 +379,10 @@ out:
tracking ? 0 :
IEEE80211_CHAN_DISABLED)) {
if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
- ret = IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_HE;
+ ret = IEEE80211_CONN_DISABLE_HT |
+ IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
break;
}
@@ -349,7 +391,11 @@ out:
if (!he_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef,
IEEE80211_CHAN_NO_HE))
- ret |= IEEE80211_STA_DISABLE_HE;
+ ret |= IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT;
+
+ if (!eht_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef,
+ IEEE80211_CHAN_NO_EHT))
+ ret |= IEEE80211_CONN_DISABLE_EHT;
if (chandef->width != vht_chandef.width && !tracking)
sdata_info(sdata,
@@ -359,61 +405,69 @@ out:
return ret;
}
-static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
+static int ieee80211_config_bw(struct ieee80211_link_data *link,
const struct ieee80211_ht_cap *ht_cap,
const struct ieee80211_vht_cap *vht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
const struct ieee80211_he_operation *he_oper,
+ const struct ieee80211_eht_operation *eht_oper,
const struct ieee80211_s1g_oper_ie *s1g_oper,
const u8 *bssid, u32 *changed)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_channel *chan = sdata->vif.bss_conf.chandef.chan;
+ struct ieee80211_channel *chan = link->conf->chandef.chan;
struct ieee80211_supported_band *sband =
local->hw.wiphy->bands[chan->band];
struct cfg80211_chan_def chandef;
u16 ht_opmode;
- u32 flags;
+ ieee80211_conn_flags_t flags;
u32 vht_cap_info = 0;
int ret;
/* if HT was/is disabled, don't track any bandwidth changes */
- if (ifmgd->flags & IEEE80211_STA_DISABLE_HT || !ht_oper)
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT || !ht_oper)
return 0;
/* don't check VHT if we associated as non-VHT station */
- if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)
vht_oper = NULL;
/* don't check HE if we associated as non-HE station */
- if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE ||
!ieee80211_get_he_iftype_cap(sband,
- ieee80211_vif_type_p2p(&sdata->vif)))
-
+ ieee80211_vif_type_p2p(&sdata->vif))) {
he_oper = NULL;
+ eht_oper = NULL;
+ }
- if (WARN_ON_ONCE(!sta))
- return -EINVAL;
+ /* don't check EHT if we associated as non-EHT station */
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT ||
+ !ieee80211_get_eht_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
+ eht_oper = NULL;
/*
* if bss configuration changed store the new one -
* this may be applicable even if channel is identical
*/
ht_opmode = le16_to_cpu(ht_oper->operation_mode);
- if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) {
+ if (link->conf->ht_operation_mode != ht_opmode) {
*changed |= BSS_CHANGED_HT;
- sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
+ link->conf->ht_operation_mode = ht_opmode;
}
if (vht_cap)
vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info);
/* calculate new channel (type) based on HT/VHT/HE operation IEs */
- flags = ieee80211_determine_chantype(sdata, sband, chan, vht_cap_info,
- ht_oper, vht_oper, he_oper,
+ flags = ieee80211_determine_chantype(sdata, link,
+ link->u.mgd.conn_flags,
+ sband, chan, vht_cap_info,
+ ht_oper, vht_oper,
+ he_oper, eht_oper,
s1g_oper, &chandef, true);
/*
@@ -423,46 +477,48 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
* reasons) then switching to a 40 MHz channel now won't do us
* any good -- we couldn't use it with the AP.
*/
- if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ &&
chandef.width == NL80211_CHAN_WIDTH_80P80)
flags |= ieee80211_chandef_downgrade(&chandef);
- if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_160MHZ &&
chandef.width == NL80211_CHAN_WIDTH_160)
flags |= ieee80211_chandef_downgrade(&chandef);
- if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_40MHZ &&
chandef.width > NL80211_CHAN_WIDTH_20)
flags |= ieee80211_chandef_downgrade(&chandef);
- if (cfg80211_chandef_identical(&chandef, &sdata->vif.bss_conf.chandef))
+ if (cfg80211_chandef_identical(&chandef, &link->conf->chandef))
return 0;
- sdata_info(sdata,
- "AP %pM changed bandwidth, new config is %d.%03d MHz, "
- "width %d (%d.%03d/%d MHz)\n",
- ifmgd->bssid, chandef.chan->center_freq,
- chandef.chan->freq_offset, chandef.width,
- chandef.center_freq1, chandef.freq1_offset,
- chandef.center_freq2);
-
- if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_HE |
- IEEE80211_STA_DISABLE_40MHZ |
- IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ)) ||
+ link_info(link,
+ "AP %pM changed bandwidth, new config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n",
+ link->u.mgd.bssid, chandef.chan->center_freq,
+ chandef.chan->freq_offset, chandef.width,
+ chandef.center_freq1, chandef.freq1_offset,
+ chandef.center_freq2);
+
+ if (flags != (link->u.mgd.conn_flags &
+ (IEEE80211_CONN_DISABLE_HT |
+ IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT |
+ IEEE80211_CONN_DISABLE_40MHZ |
+ IEEE80211_CONN_DISABLE_80P80MHZ |
+ IEEE80211_CONN_DISABLE_160MHZ |
+ IEEE80211_CONN_DISABLE_320MHZ)) ||
!cfg80211_chandef_valid(&chandef)) {
sdata_info(sdata,
"AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n",
- ifmgd->bssid, flags, ifmgd->flags);
+ link->u.mgd.bssid, flags, ifmgd->flags);
return -EINVAL;
}
- ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed);
+ ret = ieee80211_link_change_bandwidth(link, &chandef, changed);
if (ret) {
sdata_info(sdata,
"AP %pM changed bandwidth to incompatible one - disconnect\n",
- ifmgd->bssid);
+ link->u.mgd.bssid);
return ret;
}
@@ -475,7 +531,8 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ap_ht_param,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
- enum ieee80211_smps_mode smps)
+ enum ieee80211_smps_mode smps,
+ ieee80211_conn_flags_t conn_flags)
{
u8 *pos;
u32 flags = channel->flags;
@@ -510,7 +567,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
* capable of 40 MHz -- some broken APs will never fall
* back to trying to transmit in 20 MHz.
*/
- if (sdata->u.mgd.flags & IEEE80211_STA_DISABLE_40MHZ) {
+ if (conn_flags & IEEE80211_CONN_DISABLE_40MHZ) {
cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
cap &= ~IEEE80211_HT_CAP_SGI_40;
}
@@ -543,18 +600,20 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
/* This function determines vht capability flags for the association
* and builds the IE.
- * Note - the function may set the owner of the MU-MIMO capability
+ * Note - the function returns true to own the MU-MIMO capability
*/
-static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
+static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
struct ieee80211_supported_band *sband,
- struct ieee80211_vht_cap *ap_vht_cap)
+ struct ieee80211_vht_cap *ap_vht_cap,
+ ieee80211_conn_flags_t conn_flags)
{
struct ieee80211_local *local = sdata->local;
u8 *pos;
u32 cap;
struct ieee80211_sta_vht_cap vht_cap;
u32 mask, ap_bf_sts, our_bf_sts;
+ bool mu_mimo_owner = false;
BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
@@ -564,7 +623,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
/* determine capability flags */
cap = vht_cap.cap;
- if (sdata->u.mgd.flags & IEEE80211_STA_DISABLE_80P80MHZ) {
+ if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ) {
u32 bw = cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
@@ -573,7 +632,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
}
- if (sdata->u.mgd.flags & IEEE80211_STA_DISABLE_160MHZ) {
+ if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ) {
cap &= ~IEEE80211_VHT_CAP_SHORT_GI_160;
cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
}
@@ -602,7 +661,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sub_if_data *other;
list_for_each_entry_rcu(other, &local->interfaces, list) {
- if (other->vif.mu_mimo_owner) {
+ if (other->vif.bss_conf.mu_mimo_owner) {
disable_mu_mimo = true;
break;
}
@@ -610,7 +669,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
if (disable_mu_mimo)
cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
else
- sdata->vif.mu_mimo_owner = true;
+ mu_mimo_owner = true;
}
mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
@@ -626,6 +685,8 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
/* reserve and fill IE */
pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
+
+ return mu_mimo_owner;
}
/* This function determines HE capability flags for the association
@@ -633,78 +694,78 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
*/
static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
- struct ieee80211_supported_band *sband)
+ struct ieee80211_supported_band *sband,
+ enum ieee80211_smps_mode smps_mode,
+ ieee80211_conn_flags_t conn_flags)
{
- u8 *pos;
- const struct ieee80211_sta_he_cap *he_cap = NULL;
- struct ieee80211_chanctx_conf *chanctx_conf;
+ u8 *pos, *pre_he_pos;
+ const struct ieee80211_sta_he_cap *he_cap;
u8 he_cap_size;
- bool reg_cap = false;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!WARN_ON_ONCE(!chanctx_conf))
- reg_cap = cfg80211_chandef_usable(sdata->wdev.wiphy,
- &chanctx_conf->def,
- IEEE80211_CHAN_NO_HE);
-
- rcu_read_unlock();
he_cap = ieee80211_get_he_iftype_cap(sband,
ieee80211_vif_type_p2p(&sdata->vif));
- if (!he_cap || !reg_cap)
+ if (WARN_ON(!he_cap))
return;
+ /* get a max size estimate */
he_cap_size =
2 + 1 + sizeof(he_cap->he_cap_elem) +
ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
ieee80211_he_ppe_size(he_cap->ppe_thres[0],
he_cap->he_cap_elem.phy_cap_info);
pos = skb_put(skb, he_cap_size);
- ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+ pre_he_pos = pos;
+ pos = ieee80211_ie_build_he_cap(conn_flags,
+ pos, he_cap, pos + he_cap_size);
+ /* trim excess if any */
+ skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos));
- ieee80211_ie_build_he_6ghz_cap(sdata, skb);
+ ieee80211_ie_build_he_6ghz_cap(sdata, smps_mode, skb);
}
-static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb,
+ struct ieee80211_supported_band *sband)
{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
- struct sk_buff *skb;
- struct ieee80211_mgmt *mgmt;
- u8 *pos, qos_info, *ie_start;
- size_t offset = 0, noffset;
- int i, count, rates_len, supp_rates_len, shift;
- u16 capab;
- struct ieee80211_supported_band *sband;
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_channel *chan;
- u32 rates = 0;
- __le16 listen_int;
- struct element *ext_capa = NULL;
- enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
- const struct ieee80211_sband_iftype_data *iftd;
- struct ieee80211_prep_tx_info info = {};
-
- /* we know it's writable, cast away the const */
- if (assoc_data->ie_len)
- ext_capa = (void *)cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
- assoc_data->ie,
- assoc_data->ie_len);
+ u8 *pos;
+ const struct ieee80211_sta_he_cap *he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap;
+ u8 eht_cap_size;
- sdata_assert_lock(sdata);
+ he_cap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ eht_cap = ieee80211_get_eht_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (WARN_ON(!chanctx_conf)) {
- rcu_read_unlock();
+ /*
+ * EHT capabilities element is only added if the HE capabilities element
+ * was added so assume that 'he_cap' is valid and don't check it.
+ */
+ if (WARN_ON(!he_cap || !eht_cap))
return;
- }
- chan = chanctx_conf->def.chan;
- rcu_read_unlock();
- sband = local->hw.wiphy->bands[chan->band];
- shift = ieee80211_vif_get_shift(&sdata->vif);
+
+ eht_cap_size =
+ 2 + 1 + sizeof(eht_cap->eht_cap_elem) +
+ ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
+ &eht_cap->eht_cap_elem,
+ false) +
+ ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
+ eht_cap->eht_cap_elem.phy_cap_info);
+ pos = skb_put(skb, eht_cap_size);
+ ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + eht_cap_size,
+ false);
+}
+
+static void ieee80211_assoc_add_rates(struct sk_buff *skb,
+ enum nl80211_chan_width width,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_mgd_assoc_data *assoc_data)
+{
+ unsigned int shift = ieee80211_chanwidth_get_shift(width);
+ unsigned int rates_len, supp_rates_len;
+ u32 rates = 0;
+ int i, count;
+ u8 *pos;
if (assoc_data->supp_rates_len) {
/*
@@ -713,7 +774,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
* in the association request (e.g. D-Link DAP 1353 in
* b-only mode)...
*/
- rates_len = ieee80211_parse_bitrates(&chanctx_conf->def, sband,
+ rates_len = ieee80211_parse_bitrates(width, sband,
assoc_data->supp_rates,
assoc_data->supp_rates_len,
&rates);
@@ -723,91 +784,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
* before association, we send information element(s) with
* all rates that we support.
*/
- rates_len = 0;
- for (i = 0; i < sband->n_bitrates; i++) {
+ rates_len = sband->n_bitrates;
+ for (i = 0; i < sband->n_bitrates; i++)
rates |= BIT(i);
- rates_len++;
- }
}
- iftd = ieee80211_get_sband_iftype_data(sband, iftype);
-
- skb = alloc_skb(local->hw.extra_tx_headroom +
- sizeof(*mgmt) + /* bit too much but doesn't matter */
- 2 + assoc_data->ssid_len + /* SSID */
- 4 + rates_len + /* (extended) rates */
- 4 + /* power capability */
- 2 + 2 * sband->n_channels + /* supported channels */
- 2 + sizeof(struct ieee80211_ht_cap) + /* HT */
- 2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
- 2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */
- sizeof(struct ieee80211_he_mcs_nss_supp) +
- IEEE80211_HE_PPE_THRES_MAX_LEN +
- 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
- assoc_data->ie_len + /* extra IEs */
- (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
- 9 + /* WMM */
- (iftd ? iftd->vendor_elems.len : 0),
- GFP_KERNEL);
- if (!skb)
- return;
-
- skb_reserve(skb, local->hw.extra_tx_headroom);
-
- capab = WLAN_CAPABILITY_ESS;
-
- if (sband->band == NL80211_BAND_2GHZ) {
- capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
- capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
- }
-
- if (assoc_data->capability & WLAN_CAPABILITY_PRIVACY)
- capab |= WLAN_CAPABILITY_PRIVACY;
-
- if ((assoc_data->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
- ieee80211_hw_check(&local->hw, SPECTRUM_MGMT))
- capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
-
- if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM)
- capab |= WLAN_CAPABILITY_RADIO_MEASURE;
-
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, assoc_data->bss->bssid, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(mgmt->bssid, assoc_data->bss->bssid, ETH_ALEN);
-
- listen_int = cpu_to_le16(sband->band == NL80211_BAND_S1GHZ ?
- ieee80211_encode_usf(local->hw.conf.listen_interval) :
- local->hw.conf.listen_interval);
- if (!is_zero_ether_addr(assoc_data->prev_bssid)) {
- skb_put(skb, 10);
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_REASSOC_REQ);
- mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
- mgmt->u.reassoc_req.listen_interval = listen_int;
- memcpy(mgmt->u.reassoc_req.current_ap, assoc_data->prev_bssid,
- ETH_ALEN);
- info.subtype = IEEE80211_STYPE_REASSOC_REQ;
- } else {
- skb_put(skb, 4);
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ASSOC_REQ);
- mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
- mgmt->u.assoc_req.listen_interval = listen_int;
- info.subtype = IEEE80211_STYPE_ASSOC_REQ;
- }
-
- /* SSID */
- pos = skb_put(skb, 2 + assoc_data->ssid_len);
- ie_start = pos;
- *pos++ = WLAN_EID_SSID;
- *pos++ = assoc_data->ssid_len;
- memcpy(pos, assoc_data->ssid, assoc_data->ssid_len);
-
- if (sband->band == NL80211_BAND_S1GHZ)
- goto skip_rates;
-
- /* add all rates which were marked to be used above */
supp_rates_len = rates_len;
if (supp_rates_len > 8)
supp_rates_len = 8;
@@ -821,7 +802,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
if (BIT(i) & rates) {
int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
5 * (1 << shift));
- *pos++ = (u8) rate;
+ *pos++ = (u8)rate;
if (++count == 8)
break;
}
@@ -835,22 +816,219 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
for (i++; i < sband->n_bitrates; i++) {
if (BIT(i) & rates) {
int rate;
+
rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
5 * (1 << shift));
- *pos++ = (u8) rate;
+ *pos++ = (u8)rate;
}
}
}
+}
+
+static size_t ieee80211_add_before_ht_elems(struct sk_buff *skb,
+ const u8 *elems,
+ size_t elems_len,
+ size_t offset)
+{
+ size_t noffset;
+
+ static const u8 before_ht[] = {
+ WLAN_EID_SSID,
+ WLAN_EID_SUPP_RATES,
+ WLAN_EID_EXT_SUPP_RATES,
+ WLAN_EID_PWR_CAPABILITY,
+ WLAN_EID_SUPPORTED_CHANNELS,
+ WLAN_EID_RSN,
+ WLAN_EID_QOS_CAPA,
+ WLAN_EID_RRM_ENABLED_CAPABILITIES,
+ WLAN_EID_MOBILITY_DOMAIN,
+ WLAN_EID_FAST_BSS_TRANSITION, /* reassoc only */
+ WLAN_EID_RIC_DATA, /* reassoc only */
+ WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ };
+ static const u8 after_ric[] = {
+ WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+ WLAN_EID_HT_CAPABILITY,
+ WLAN_EID_BSS_COEX_2040,
+ /* luckily this is almost always there */
+ WLAN_EID_EXT_CAPABILITY,
+ WLAN_EID_QOS_TRAFFIC_CAPA,
+ WLAN_EID_TIM_BCAST_REQ,
+ WLAN_EID_INTERWORKING,
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
+ WLAN_EID_VHT_CAPABILITY,
+ WLAN_EID_OPMODE_NOTIF,
+ };
+
+ if (!elems_len)
+ return offset;
+
+ noffset = ieee80211_ie_split_ric(elems, elems_len,
+ before_ht,
+ ARRAY_SIZE(before_ht),
+ after_ric,
+ ARRAY_SIZE(after_ric),
+ offset);
+ skb_put_data(skb, elems + offset, noffset - offset);
+
+ return noffset;
+}
+
+static size_t ieee80211_add_before_vht_elems(struct sk_buff *skb,
+ const u8 *elems,
+ size_t elems_len,
+ size_t offset)
+{
+ static const u8 before_vht[] = {
+ /*
+ * no need to list the ones split off before HT
+ * or generated here
+ */
+ WLAN_EID_BSS_COEX_2040,
+ WLAN_EID_EXT_CAPABILITY,
+ WLAN_EID_QOS_TRAFFIC_CAPA,
+ WLAN_EID_TIM_BCAST_REQ,
+ WLAN_EID_INTERWORKING,
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
+ };
+ size_t noffset;
+
+ if (!elems_len)
+ return offset;
+
+ /* RIC already taken care of in ieee80211_add_before_ht_elems() */
+ noffset = ieee80211_ie_split(elems, elems_len,
+ before_vht, ARRAY_SIZE(before_vht),
+ offset);
+ skb_put_data(skb, elems + offset, noffset - offset);
+
+ return noffset;
+}
+
+static size_t ieee80211_add_before_he_elems(struct sk_buff *skb,
+ const u8 *elems,
+ size_t elems_len,
+ size_t offset)
+{
+ static const u8 before_he[] = {
+ /*
+ * no need to list the ones split off before VHT
+ * or generated here
+ */
+ WLAN_EID_OPMODE_NOTIF,
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE,
+ /* 11ai elements */
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_SESSION,
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_PUBLIC_KEY,
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_KEY_CONFIRM,
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_HLP_CONTAINER,
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN,
+ /* TODO: add 11ah/11aj/11ak elements */
+ };
+ size_t noffset;
+
+ if (!elems_len)
+ return offset;
+
+ /* RIC already taken care of in ieee80211_add_before_ht_elems() */
+ noffset = ieee80211_ie_split(elems, elems_len,
+ before_he, ARRAY_SIZE(before_he),
+ offset);
+ skb_put_data(skb, elems + offset, noffset - offset);
+
+ return noffset;
+}
+
+#define PRESENT_ELEMS_MAX 8
+#define PRESENT_ELEM_EXT_OFFS 0x100
+
+static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, u16 capab,
+ const struct element *ext_capa,
+ const u16 *present_elems);
+
+static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, u16 *capab,
+ const struct element *ext_capa,
+ const u8 *extra_elems,
+ size_t extra_elems_len,
+ unsigned int link_id,
+ struct ieee80211_link_data *link,
+ u16 *present_elems)
+{
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+ struct ieee80211_channel *chan = cbss->channel;
+ const struct ieee80211_sband_iftype_data *iftd;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband;
+ enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ enum ieee80211_smps_mode smps_mode;
+ u16 orig_capab = *capab;
+ size_t offset = 0;
+ int present_elems_len = 0;
+ u8 *pos;
+ int i;
+
+#define ADD_PRESENT_ELEM(id) do { \
+ /* need a last for termination - we use 0 == SSID */ \
+ if (!WARN_ON(present_elems_len >= PRESENT_ELEMS_MAX - 1)) \
+ present_elems[present_elems_len++] = (id); \
+} while (0)
+#define ADD_PRESENT_EXT_ELEM(id) ADD_PRESENT_ELEM(PRESENT_ELEM_EXT_OFFS | (id))
+
+ if (link)
+ smps_mode = link->smps_mode;
+ else if (sdata->u.mgd.powersave)
+ smps_mode = IEEE80211_SMPS_DYNAMIC;
+ else
+ smps_mode = IEEE80211_SMPS_OFF;
+
+ if (link) {
+ /*
+ * 5/10 MHz scenarios are only viable without MLO, in which
+ * case this pointer should be used ... All of this is a bit
+ * unclear though, not sure this even works at all.
+ */
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
+ if (chanctx_conf)
+ width = chanctx_conf->def.width;
+ rcu_read_unlock();
+ }
+
+ sband = local->hw.wiphy->bands[chan->band];
+ iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+
+ if (sband->band == NL80211_BAND_2GHZ) {
+ *capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+ *capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+ }
+
+ if ((cbss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
+ ieee80211_hw_check(&local->hw, SPECTRUM_MGMT))
+ *capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
+
+ if (sband->band != NL80211_BAND_S1GHZ)
+ ieee80211_assoc_add_rates(skb, width, sband, assoc_data);
+
+ if (*capab & WLAN_CAPABILITY_SPECTRUM_MGMT ||
+ *capab & WLAN_CAPABILITY_RADIO_MEASURE) {
+ struct cfg80211_chan_def chandef = {
+ .width = width,
+ .chan = chan,
+ };
-skip_rates:
- if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT ||
- capab & WLAN_CAPABILITY_RADIO_MEASURE) {
pos = skb_put(skb, 4);
*pos++ = WLAN_EID_PWR_CAPABILITY;
*pos++ = 2;
*pos++ = 0; /* min tx power */
/* max tx power */
- *pos++ = ieee80211_chandef_max_power(&chanctx_conf->def);
+ *pos++ = ieee80211_chandef_max_power(&chandef);
+ ADD_PRESENT_ELEM(WLAN_EID_PWR_CAPABILITY);
}
/*
@@ -858,7 +1036,7 @@ skip_rates:
* support for extended channel switching, but we've always done that;
* (for now?) apply this restriction only on the (new) 6 GHz band.
*/
- if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT &&
+ if (*capab & WLAN_CAPABILITY_SPECTRUM_MGMT &&
(sband->band != NL80211_BAND_6GHZ ||
!ext_capa || ext_capa->datalen < 1 ||
!(ext_capa->data[0] & WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING))) {
@@ -867,136 +1045,427 @@ skip_rates:
*pos++ = WLAN_EID_SUPPORTED_CHANNELS;
*pos++ = 2 * sband->n_channels;
for (i = 0; i < sband->n_channels; i++) {
- *pos++ = ieee80211_frequency_to_channel(
- sband->channels[i].center_freq);
+ int cf = sband->channels[i].center_freq;
+
+ *pos++ = ieee80211_frequency_to_channel(cf);
*pos++ = 1; /* one channel in the subband*/
}
+ ADD_PRESENT_ELEM(WLAN_EID_SUPPORTED_CHANNELS);
}
- /* Set MBSSID support for HE AP if needed */
- if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && assoc_data->ie_len &&
- ext_capa && ext_capa->datalen >= 3)
- ext_capa->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
-
/* if present, add any custom IEs that go before HT */
- if (assoc_data->ie_len) {
- static const u8 before_ht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_PWR_CAPABILITY,
- WLAN_EID_SUPPORTED_CHANNELS,
- WLAN_EID_RSN,
- WLAN_EID_QOS_CAPA,
- WLAN_EID_RRM_ENABLED_CAPABILITIES,
- WLAN_EID_MOBILITY_DOMAIN,
- WLAN_EID_FAST_BSS_TRANSITION, /* reassoc only */
- WLAN_EID_RIC_DATA, /* reassoc only */
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- };
- static const u8 after_ric[] = {
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
- WLAN_EID_BSS_COEX_2040,
- /* luckily this is almost always there */
- WLAN_EID_EXT_CAPABILITY,
- WLAN_EID_QOS_TRAFFIC_CAPA,
- WLAN_EID_TIM_BCAST_REQ,
- WLAN_EID_INTERWORKING,
- /* 60 GHz (Multi-band, DMG, MMS) can't happen */
- WLAN_EID_VHT_CAPABILITY,
- WLAN_EID_OPMODE_NOTIF,
- };
+ offset = ieee80211_add_before_ht_elems(skb, extra_elems,
+ extra_elems_len,
+ offset);
- noffset = ieee80211_ie_split_ric(assoc_data->ie,
- assoc_data->ie_len,
- before_ht,
- ARRAY_SIZE(before_ht),
- after_ric,
- ARRAY_SIZE(after_ric),
- offset);
- skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
- offset = noffset;
+ if (sband->band != NL80211_BAND_6GHZ &&
+ !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ ieee80211_add_ht_ie(sdata, skb,
+ assoc_data->link[link_id].ap_ht_param,
+ sband, chan, smps_mode,
+ assoc_data->link[link_id].conn_flags);
+ ADD_PRESENT_ELEM(WLAN_EID_HT_CAPABILITY);
}
- if (WARN_ON_ONCE((ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)))
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ /* if present, add any custom IEs that go before VHT */
+ offset = ieee80211_add_before_vht_elems(skb, extra_elems,
+ extra_elems_len,
+ offset);
if (sband->band != NL80211_BAND_6GHZ &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
- ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param,
- sband, chan, sdata->smps_mode);
-
- /* if present, add any custom IEs that go before VHT */
- if (assoc_data->ie_len) {
- static const u8 before_vht[] = {
- /*
- * no need to list the ones split off before HT
- * or generated here
- */
- WLAN_EID_BSS_COEX_2040,
- WLAN_EID_EXT_CAPABILITY,
- WLAN_EID_QOS_TRAFFIC_CAPA,
- WLAN_EID_TIM_BCAST_REQ,
- WLAN_EID_INTERWORKING,
- /* 60 GHz (Multi-band, DMG, MMS) can't happen */
- };
+ !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ bool mu_mimo_owner =
+ ieee80211_add_vht_ie(sdata, skb, sband,
+ &assoc_data->link[link_id].ap_vht_cap,
+ assoc_data->link[link_id].conn_flags);
- /* RIC already taken above, so no need to handle here anymore */
- noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
- before_vht, ARRAY_SIZE(before_vht),
- offset);
- skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
- offset = noffset;
+ if (link)
+ link->conf->mu_mimo_owner = mu_mimo_owner;
+ ADD_PRESENT_ELEM(WLAN_EID_VHT_CAPABILITY);
}
+ /*
+ * If AP doesn't support HT, mark HE and EHT as disabled.
+ * If on the 5GHz band, make sure it supports VHT.
+ */
+ if (assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT ||
+ (sband->band == NL80211_BAND_5GHZ &&
+ assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT))
+ assoc_data->link[link_id].conn_flags |=
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
+
/* if present, add any custom IEs that go before HE */
- if (assoc_data->ie_len) {
- static const u8 before_he[] = {
- /*
- * no need to list the ones split off before VHT
- * or generated here
- */
- WLAN_EID_OPMODE_NOTIF,
- WLAN_EID_EXTENSION, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE,
- /* 11ai elements */
- WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_SESSION,
- WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_PUBLIC_KEY,
- WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_KEY_CONFIRM,
- WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_HLP_CONTAINER,
- WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN,
- /* TODO: add 11ah/11aj/11ak elements */
- };
+ offset = ieee80211_add_before_he_elems(skb, extra_elems,
+ extra_elems_len,
+ offset);
- /* RIC already taken above, so no need to handle here anymore */
- noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
- before_he, ARRAY_SIZE(before_he),
- offset);
- pos = skb_put(skb, noffset - offset);
- memcpy(pos, assoc_data->ie + offset, noffset - offset);
- offset = noffset;
+ if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HE)) {
+ ieee80211_add_he_ie(sdata, skb, sband, smps_mode,
+ assoc_data->link[link_id].conn_flags);
+ ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_HE_CAPABILITY);
}
- if (sband->band != NL80211_BAND_6GHZ &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
- ieee80211_add_vht_ie(sdata, skb, sband,
- &assoc_data->ap_vht_cap);
-
/*
- * If AP doesn't support HT, mark HE as disabled.
- * If on the 5GHz band, make sure it supports VHT.
+ * careful - need to know about all the present elems before
+ * calling ieee80211_assoc_add_ml_elem(), so add this one if
+ * we're going to put it after the ML element
*/
- if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
- (sband->band == NL80211_BAND_5GHZ &&
- ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+ if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT))
+ ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_EHT_CAPABILITY);
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
- ieee80211_add_he_ie(sdata, skb, sband);
+ if (link_id == assoc_data->assoc_link_id)
+ ieee80211_assoc_add_ml_elem(sdata, skb, orig_capab, ext_capa,
+ present_elems);
- /* if present, add any custom non-vendor IEs that go after HE */
+ /* crash if somebody gets it wrong */
+ present_elems = NULL;
+
+ if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT))
+ ieee80211_add_eht_ie(sdata, skb, sband);
+
+ if (sband->band == NL80211_BAND_S1GHZ) {
+ ieee80211_add_aid_request_ie(sdata, skb);
+ ieee80211_add_s1g_capab_ie(sdata, &sband->s1g_cap, skb);
+ }
+
+ if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
+ skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
+
+ if (link)
+ link->u.mgd.conn_flags = assoc_data->link[link_id].conn_flags;
+
+ return offset;
+}
+
+static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
+ const u16 *outer,
+ const u16 *inner)
+{
+ unsigned int skb_len = skb->len;
+ bool added = false;
+ int i, j;
+ u8 *len, *list_len = NULL;
+
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ len = skb_put(skb, 1);
+ skb_put_u8(skb, WLAN_EID_EXT_NON_INHERITANCE);
+
+ for (i = 0; i < PRESENT_ELEMS_MAX && outer[i]; i++) {
+ u16 elem = outer[i];
+ bool have_inner = false;
+ bool at_extension = false;
+
+ /* should at least be sorted in the sense of normal -> ext */
+ WARN_ON(at_extension && elem < PRESENT_ELEM_EXT_OFFS);
+
+ /* switch to extension list */
+ if (!at_extension && elem >= PRESENT_ELEM_EXT_OFFS) {
+ at_extension = true;
+ if (!list_len)
+ skb_put_u8(skb, 0);
+ list_len = NULL;
+ }
+
+ for (j = 0; j < PRESENT_ELEMS_MAX && inner[j]; j++) {
+ if (elem == inner[j]) {
+ have_inner = true;
+ break;
+ }
+ }
+
+ if (have_inner)
+ continue;
+
+ if (!list_len) {
+ list_len = skb_put(skb, 1);
+ *list_len = 0;
+ }
+ *list_len += 1;
+ skb_put_u8(skb, (u8)elem);
+ }
+
+ if (!added)
+ skb_trim(skb, skb_len);
+ else
+ *len = skb->len - skb_len - 2;
+}
+
+static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, u16 capab,
+ const struct element *ext_capa,
+ const u16 *outer_present_elems)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ struct ieee80211_multi_link_elem *ml_elem;
+ struct ieee80211_mle_basic_common_info *common;
+ const struct wiphy_iftype_ext_capab *ift_ext_capa;
+ __le16 eml_capa = 0, mld_capa_ops = 0;
+ unsigned int link_id;
+ u8 *ml_elem_len;
+ void *capab_pos;
+
+ if (!sdata->vif.valid_links)
+ return;
+
+ ift_ext_capa = cfg80211_get_iftype_ext_capa(local->hw.wiphy,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (ift_ext_capa) {
+ eml_capa = cpu_to_le16(ift_ext_capa->eml_capabilities);
+ mld_capa_ops = cpu_to_le16(ift_ext_capa->mld_capa_and_ops);
+ }
+
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ ml_elem_len = skb_put(skb, 1);
+ skb_put_u8(skb, WLAN_EID_EXT_EHT_MULTI_LINK);
+ ml_elem = skb_put(skb, sizeof(*ml_elem));
+ ml_elem->control =
+ cpu_to_le16(IEEE80211_ML_CONTROL_TYPE_BASIC |
+ IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP);
+ common = skb_put(skb, sizeof(*common));
+ common->len = sizeof(*common) +
+ 2; /* MLD capa/ops */
+ memcpy(common->mld_mac_addr, sdata->vif.addr, ETH_ALEN);
+
+ /* add EML_CAPA only if needed, see Draft P802.11be_D2.1, 35.3.17 */
+ if (eml_capa &
+ cpu_to_le16((IEEE80211_EML_CAP_EMLSR_SUPP |
+ IEEE80211_EML_CAP_EMLMR_SUPPORT))) {
+ common->len += 2; /* EML capabilities */
+ ml_elem->control |=
+ cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EML_CAPA);
+ skb_put_data(skb, &eml_capa, sizeof(eml_capa));
+ }
+ /* need indication from userspace to support this */
+ mld_capa_ops &= ~cpu_to_le16(IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP);
+ skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops));
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ u16 link_present_elems[PRESENT_ELEMS_MAX] = {};
+ const u8 *extra_elems;
+ size_t extra_elems_len;
+ size_t extra_used;
+ u8 *subelem_len = NULL;
+ __le16 ctrl;
+
+ if (!assoc_data->link[link_id].bss ||
+ link_id == assoc_data->assoc_link_id)
+ continue;
+
+ extra_elems = assoc_data->link[link_id].elems;
+ extra_elems_len = assoc_data->link[link_id].elems_len;
+
+ skb_put_u8(skb, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE);
+ subelem_len = skb_put(skb, 1);
+
+ ctrl = cpu_to_le16(link_id |
+ IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE |
+ IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT);
+ skb_put_data(skb, &ctrl, sizeof(ctrl));
+ skb_put_u8(skb, 1 + ETH_ALEN); /* STA Info Length */
+ skb_put_data(skb, assoc_data->link[link_id].addr,
+ ETH_ALEN);
+ /*
+ * Now add the contents of the (re)association request,
+ * but the "listen interval" and "current AP address"
+ * (if applicable) are skipped. So we only have
+ * the capability field (remember the position and fill
+ * later), followed by the elements added below by
+ * calling ieee80211_assoc_link_elems().
+ */
+ capab_pos = skb_put(skb, 2);
+
+ extra_used = ieee80211_assoc_link_elems(sdata, skb, &capab,
+ ext_capa,
+ extra_elems,
+ extra_elems_len,
+ link_id, NULL,
+ link_present_elems);
+ if (extra_elems)
+ skb_put_data(skb, extra_elems + extra_used,
+ extra_elems_len - extra_used);
+
+ put_unaligned_le16(capab, capab_pos);
+
+ ieee80211_add_non_inheritance_elem(skb, outer_present_elems,
+ link_present_elems);
+
+ ieee80211_fragment_element(skb, subelem_len);
+ }
+
+ ieee80211_fragment_element(skb, ml_elem_len);
+}
+
+static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ struct ieee80211_link_data *link;
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt;
+ u8 *pos, qos_info, *ie_start;
+ size_t offset, noffset;
+ u16 capab = WLAN_CAPABILITY_ESS, link_capab;
+ __le16 listen_int;
+ struct element *ext_capa = NULL;
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ struct ieee80211_prep_tx_info info = {};
+ unsigned int link_id, n_links = 0;
+ u16 present_elems[PRESENT_ELEMS_MAX] = {};
+ void *capab_pos;
+ size_t size;
+ int ret;
+
+ /* we know it's writable, cast away the const */
+ if (assoc_data->ie_len)
+ ext_capa = (void *)cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
+ assoc_data->ie,
+ assoc_data->ie_len);
+
+ sdata_assert_lock(sdata);
+
+ size = local->hw.extra_tx_headroom +
+ sizeof(*mgmt) + /* bit too much but doesn't matter */
+ 2 + assoc_data->ssid_len + /* SSID */
+ assoc_data->ie_len + /* extra IEs */
+ (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
+ 9; /* WMM */
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+ const struct ieee80211_sband_iftype_data *iftd;
+ struct ieee80211_supported_band *sband;
+
+ if (!cbss)
+ continue;
+
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+
+ n_links++;
+ /* add STA profile elements length */
+ size += assoc_data->link[link_id].elems_len;
+ /* and supported rates length */
+ size += 4 + sband->n_bitrates;
+ /* supported channels */
+ size += 2 + 2 * sband->n_channels;
+
+ iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+ if (iftd)
+ size += iftd->vendor_elems.len;
+
+ /* power capability */
+ size += 4;
+
+ /* HT, VHT, HE, EHT */
+ size += 2 + sizeof(struct ieee80211_ht_cap);
+ size += 2 + sizeof(struct ieee80211_vht_cap);
+ size += 2 + 1 + sizeof(struct ieee80211_he_cap_elem) +
+ sizeof(struct ieee80211_he_mcs_nss_supp) +
+ IEEE80211_HE_PPE_THRES_MAX_LEN;
+
+ if (sband->band == NL80211_BAND_6GHZ)
+ size += 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa);
+
+ size += 2 + 1 + sizeof(struct ieee80211_eht_cap_elem) +
+ sizeof(struct ieee80211_eht_mcs_nss_supp) +
+ IEEE80211_EHT_PPE_THRES_MAX_LEN;
+
+ /* non-inheritance element */
+ size += 2 + 2 + PRESENT_ELEMS_MAX;
+
+ /* should be the same across all BSSes */
+ if (cbss->capability & WLAN_CAPABILITY_PRIVACY)
+ capab |= WLAN_CAPABILITY_PRIVACY;
+ }
+
+ if (sdata->vif.valid_links) {
+ /* consider the multi-link element with STA profile */
+ size += sizeof(struct ieee80211_multi_link_elem);
+ /* max common info field in basic multi-link element */
+ size += sizeof(struct ieee80211_mle_basic_common_info) +
+ 2 + /* capa & op */
+ 2; /* EML capa */
+
+ /*
+ * The capability elements were already considered above;
+ * note this over-estimates a bit because there's no
+ * STA profile for the assoc link.
+ */
+ size += (n_links - 1) *
+ (1 + 1 + /* subelement ID/length */
+ 2 + /* STA control */
+ 1 + ETH_ALEN + 2 /* STA Info field */);
+ }
+
+ link = sdata_dereference(sdata->link[assoc_data->assoc_link_id], sdata);
+ if (WARN_ON(!link))
+ return -EINVAL;
+
+ if (WARN_ON(!assoc_data->link[assoc_data->assoc_link_id].bss))
+ return -EINVAL;
+
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ if (ifmgd->flags & IEEE80211_STA_ENABLE_RRM)
+ capab |= WLAN_CAPABILITY_RADIO_MEASURE;
+
+ /* Set MBSSID support for HE AP if needed */
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) &&
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
+ ext_capa && ext_capa->datalen >= 3)
+ ext_capa->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
+
+ mgmt = skb_put_zero(skb, 24);
+ memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
+
+ listen_int = cpu_to_le16(assoc_data->s1g ?
+ ieee80211_encode_usf(local->hw.conf.listen_interval) :
+ local->hw.conf.listen_interval);
+ if (!is_zero_ether_addr(assoc_data->prev_ap_addr)) {
+ skb_put(skb, 10);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_REASSOC_REQ);
+ capab_pos = &mgmt->u.reassoc_req.capab_info;
+ mgmt->u.reassoc_req.listen_interval = listen_int;
+ memcpy(mgmt->u.reassoc_req.current_ap,
+ assoc_data->prev_ap_addr, ETH_ALEN);
+ info.subtype = IEEE80211_STYPE_REASSOC_REQ;
+ } else {
+ skb_put(skb, 4);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ASSOC_REQ);
+ capab_pos = &mgmt->u.assoc_req.capab_info;
+ mgmt->u.assoc_req.listen_interval = listen_int;
+ info.subtype = IEEE80211_STYPE_ASSOC_REQ;
+ }
+
+ /* SSID */
+ pos = skb_put(skb, 2 + assoc_data->ssid_len);
+ ie_start = pos;
+ *pos++ = WLAN_EID_SSID;
+ *pos++ = assoc_data->ssid_len;
+ memcpy(pos, assoc_data->ssid, assoc_data->ssid_len);
+
+ /* add the elements for the assoc (main) link */
+ link_capab = capab;
+ offset = ieee80211_assoc_link_elems(sdata, skb, &link_capab,
+ ext_capa,
+ assoc_data->ie,
+ assoc_data->ie_len,
+ assoc_data->assoc_link_id, link,
+ present_elems);
+ put_unaligned_le16(link_capab, capab_pos);
+
+ /* if present, add any custom non-vendor IEs */
if (assoc_data->ie_len) {
noffset = ieee80211_ie_split_vendor(assoc_data->ie,
assoc_data->ie_len,
@@ -1017,29 +1486,28 @@ skip_rates:
pos = ieee80211_add_wmm_info_ie(skb_put(skb, 9), qos_info);
}
- if (sband->band == NL80211_BAND_S1GHZ) {
- ieee80211_add_aid_request_ie(sdata, skb);
- ieee80211_add_s1g_capab_ie(sdata, &sband->s1g_cap, skb);
- }
-
- if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
- skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
-
/* add any remaining custom (i.e. vendor specific here) IEs */
if (assoc_data->ie_len) {
noffset = assoc_data->ie_len;
skb_put_data(skb, assoc_data->ie + offset, noffset - offset);
}
- if (assoc_data->fils_kek_len &&
- fils_encrypt_assoc_req(skb, assoc_data) < 0) {
- dev_kfree_skb(skb);
- return;
+ if (assoc_data->fils_kek_len) {
+ ret = fils_encrypt_assoc_req(skb, assoc_data);
+ if (ret < 0) {
+ dev_kfree_skb(skb);
+ return ret;
+ }
}
pos = skb_tail_pointer(skb);
kfree(ifmgd->assoc_req_ies);
ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
+ if (!ifmgd->assoc_req_ies) {
+ dev_kfree_skb(skb);
+ return -ENOMEM;
+ }
+
ifmgd->assoc_req_ies_len = pos - ie_start;
drv_mgd_prepare_tx(local, sdata, &info);
@@ -1049,6 +1517,8 @@ skip_rates:
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
ieee80211_tx_skb(sdata, skb);
+
+ return 0;
}
void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -1076,8 +1546,9 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
- !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
+ skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, -1,
+ !ieee80211_hw_check(&local->hw,
+ DOESNT_SUPPORT_QOS_NDP));
if (!skb)
return;
@@ -1117,20 +1588,22 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
nullfunc->frame_control = fc;
- memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(nullfunc->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
- memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(nullfunc->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(nullfunc->addr4, sdata->vif.addr, ETH_ALEN);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE;
ieee80211_tx_skb(sdata, skb);
}
/* spectrum management related things */
static void ieee80211_chswitch_work(struct work_struct *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data, u.mgd.chswitch_work);
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
int ret;
@@ -1145,7 +1618,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
if (!ifmgd->associated)
goto out;
- if (!sdata->vif.csa_active)
+ if (!link->conf->csa_active)
goto out;
/*
@@ -1155,16 +1628,16 @@ static void ieee80211_chswitch_work(struct work_struct *work)
* completed successfully
*/
- if (sdata->reserved_chanctx) {
+ if (link->reserved_chanctx) {
/*
* with multi-vif csa driver may call ieee80211_csa_finish()
* many times while waiting for other interfaces to use their
* reservations
*/
- if (sdata->reserved_ready)
+ if (link->reserved_ready)
goto out;
- ret = ieee80211_vif_use_reserved_context(sdata);
+ ret = ieee80211_link_use_reserved_context(link);
if (ret) {
sdata_info(sdata,
"failed to use reserved channel context, disconnecting (err=%d)\n",
@@ -1177,8 +1650,8 @@ static void ieee80211_chswitch_work(struct work_struct *work)
goto out;
}
- if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef,
- &sdata->csa_chandef)) {
+ if (!cfg80211_chandef_identical(&link->conf->chandef,
+ &link->csa_chandef)) {
sdata_info(sdata,
"failed to finalize channel switch, disconnecting\n");
ieee80211_queue_work(&sdata->local->hw,
@@ -1186,7 +1659,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
goto out;
}
- ifmgd->csa_waiting_bcn = true;
+ link->u.mgd.csa_waiting_bcn = true;
ieee80211_sta_reset_beacon_monitor(sdata);
ieee80211_sta_reset_conn_monitor(sdata);
@@ -1197,29 +1670,30 @@ out:
sdata_unlock(sdata);
}
-static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
int ret;
sdata_assert_lock(sdata);
- WARN_ON(!sdata->vif.csa_active);
+ WARN_ON(!link->conf->csa_active);
- if (sdata->csa_block_tx) {
+ if (link->csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
+ link->csa_block_tx = false;
}
- sdata->vif.csa_active = false;
- ifmgd->csa_waiting_bcn = false;
+ link->conf->csa_active = false;
+ link->u.mgd.csa_waiting_bcn = false;
/*
* If the CSA IE is still present on the beacon after the switch,
* we need to consider it as a new CSA (possibly to self).
*/
- ifmgd->beacon_crc_valid = false;
+ link->u.mgd.beacon_crc_valid = false;
ret = drv_post_channel_switch(sdata);
if (ret) {
@@ -1230,7 +1704,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
return;
}
- cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef);
+ cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0);
}
void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
@@ -1238,6 +1712,9 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ if (WARN_ON(sdata->vif.valid_links))
+ success = false;
+
trace_api_chswitch_done(sdata, success);
if (!success) {
sdata_info(sdata,
@@ -1245,22 +1722,25 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
ieee80211_queue_work(&sdata->local->hw,
&ifmgd->csa_connection_drop_work);
} else {
- ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->deflink.u.mgd.chswitch_work);
}
}
EXPORT_SYMBOL(ieee80211_chswitch_done);
static void ieee80211_chswitch_timer(struct timer_list *t)
{
- struct ieee80211_sub_if_data *sdata =
- from_timer(sdata, t, u.mgd.chswitch_timer);
+ struct ieee80211_link_data *link =
+ from_timer(link, t, u.mgd.chswitch_timer);
- ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
+ ieee80211_queue_work(&link->sdata->local->hw,
+ &link->u.mgd.chswitch_work);
}
static void
-ieee80211_sta_abort_chanswitch(struct ieee80211_sub_if_data *sdata)
+ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
if (!local->ops->abort_channel_switch)
@@ -1269,15 +1749,15 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_sub_if_data *sdata)
mutex_lock(&local->mtx);
mutex_lock(&local->chanctx_mtx);
- ieee80211_vif_unreserve_chanctx(sdata);
+ ieee80211_link_unreserve_chanctx(link);
mutex_unlock(&local->chanctx_mtx);
- if (sdata->csa_block_tx)
+ if (link->csa_block_tx)
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
- sdata->vif.csa_active = false;
+ link->csa_block_tx = false;
+ link->conf->csa_active = false;
mutex_unlock(&local->mtx);
@@ -1285,14 +1765,15 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_sub_if_data *sdata)
}
static void
-ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
u64 timestamp, u32 device_timestamp,
struct ieee802_11_elems *elems,
bool beacon)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct cfg80211_bss *cbss = ifmgd->associated;
+ struct cfg80211_bss *cbss = link->u.mgd.bss;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
enum nl80211_band current_band;
@@ -1313,8 +1794,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
bss = (void *)cbss->priv;
res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
bss->vht_cap_info,
- ifmgd->flags,
- ifmgd->associated->bssid, &csa_ie);
+ link->u.mgd.conn_flags,
+ link->u.mgd.bssid, &csa_ie);
if (!res) {
ch_switch.timestamp = timestamp;
@@ -1328,22 +1809,23 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (res < 0)
goto lock_and_drop_connection;
- if (beacon && sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) {
+ if (beacon && link->conf->csa_active &&
+ !link->u.mgd.csa_waiting_bcn) {
if (res)
- ieee80211_sta_abort_chanswitch(sdata);
+ ieee80211_sta_abort_chanswitch(link);
else
drv_channel_switch_rx_beacon(sdata, &ch_switch);
return;
- } else if (sdata->vif.csa_active || res) {
+ } else if (link->conf->csa_active || res) {
/* disregard subsequent announcements if already processing */
return;
}
- if (sdata->vif.bss_conf.chandef.chan->band !=
+ if (link->conf->chandef.chan->band !=
csa_ie.chandef.chan->band) {
sdata_info(sdata,
"AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
- ifmgd->associated->bssid,
+ link->u.mgd.bssid,
csa_ie.chandef.chan->center_freq,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
csa_ie.chandef.center_freq2);
@@ -1356,7 +1838,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
"AP %pM switches to unsupported channel "
"(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
"disconnecting\n",
- ifmgd->associated->bssid,
+ link->u.mgd.bssid,
csa_ie.chandef.chan->center_freq,
csa_ie.chandef.chan->freq_offset,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
@@ -1366,14 +1848,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
if (cfg80211_chandef_identical(&csa_ie.chandef,
- &sdata->vif.bss_conf.chandef) &&
+ &link->conf->chandef) &&
(!csa_ie.mode || !beacon)) {
- if (ifmgd->csa_ignored_same_chan)
+ if (link->u.mgd.csa_ignored_same_chan)
return;
sdata_info(sdata,
"AP %pM tries to chanswitch to same channel, ignore\n",
- ifmgd->associated->bssid);
- ifmgd->csa_ignored_same_chan = true;
+ link->u.mgd.bssid);
+ link->u.mgd.csa_ignored_same_chan = true;
return;
}
@@ -1387,7 +1869,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
mutex_lock(&local->mtx);
mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (!conf) {
sdata_info(sdata,
@@ -1410,8 +1892,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
goto drop_connection;
}
- res = ieee80211_vif_reserve_chanctx(sdata, &csa_ie.chandef,
- chanctx->mode, false);
+ res = ieee80211_link_reserve_chanctx(link, &csa_ie.chandef,
+ chanctx->mode, false);
if (res) {
sdata_info(sdata,
"failed to reserve channel context for channel switch, disconnecting (err=%d)\n",
@@ -1420,18 +1902,18 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
}
mutex_unlock(&local->chanctx_mtx);
- sdata->vif.csa_active = true;
- sdata->csa_chandef = csa_ie.chandef;
- sdata->csa_block_tx = csa_ie.mode;
- ifmgd->csa_ignored_same_chan = false;
- ifmgd->beacon_crc_valid = false;
+ link->conf->csa_active = true;
+ link->csa_chandef = csa_ie.chandef;
+ link->csa_block_tx = csa_ie.mode;
+ link->u.mgd.csa_ignored_same_chan = false;
+ link->u.mgd.beacon_crc_valid = false;
- if (sdata->csa_block_tx)
+ if (link->csa_block_tx)
ieee80211_stop_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
mutex_unlock(&local->mtx);
- cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef,
+ cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, 0,
csa_ie.count, csa_ie.mode);
if (local->ops->channel_switch) {
@@ -1442,9 +1924,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
/* channel switch handled in software */
if (csa_ie.count <= 1)
- ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work);
+ ieee80211_queue_work(&local->hw, &link->u.mgd.chswitch_work);
else
- mod_timer(&ifmgd->chswitch_timer,
+ mod_timer(&link->u.mgd.chswitch_timer,
TU_TO_EXP_TIME((csa_ie.count - 1) *
cbss->beacon_interval));
return;
@@ -1459,8 +1941,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
* send a deauthentication frame. Those two fields will be
* reset when the disconnection worker runs.
*/
- sdata->vif.csa_active = true;
- sdata->csa_block_tx = csa_ie.mode;
+ link->conf->csa_active = true;
+ link->csa_block_tx = csa_ie.mode;
ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
@@ -1553,13 +2035,14 @@ static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
*pwr_level = (__s8)cisco_dtpc_ie[4];
}
-static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
+static u32 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
struct ieee80211_channel *channel,
struct ieee80211_mgmt *mgmt,
const u8 *country_ie, u8 country_ie_len,
const u8 *pwr_constr_ie,
const u8 *cisco_dtpc_ie)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
bool has_80211h_pwr = false, has_cisco_pwr = false;
int chan_pwr = 0, pwr_reduction_80211h = 0;
int pwr_level_cisco, pwr_level_80211h;
@@ -1595,25 +2078,25 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
(!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
new_ap_level = pwr_level_80211h;
- if (sdata->ap_power_level == new_ap_level)
+ if (link->ap_power_level == new_ap_level)
return 0;
sdata_dbg(sdata,
"Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
- sdata->u.mgd.bssid);
+ link->u.mgd.bssid);
} else { /* has_cisco_pwr is always true here. */
new_ap_level = pwr_level_cisco;
- if (sdata->ap_power_level == new_ap_level)
+ if (link->ap_power_level == new_ap_level)
return 0;
sdata_dbg(sdata,
"Limiting TX power to %d dBm as advertised by %pM\n",
- pwr_level_cisco, sdata->u.mgd.bssid);
+ pwr_level_cisco, link->u.mgd.bssid);
}
- sdata->ap_power_level = new_ap_level;
+ link->ap_power_level = new_ap_level;
if (__ieee80211_recalc_txpower(sdata))
return BSS_CHANGED_TXPOWER;
return 0;
@@ -1665,6 +2148,7 @@ static void ieee80211_change_ps(struct ieee80211_local *local)
static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
{
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *mgd = &sdata->u.mgd;
struct sta_info *sta = NULL;
bool authorized = false;
@@ -1681,11 +2165,12 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
if (mgd->flags & IEEE80211_STA_CONNECTION_POLL)
return false;
- if (!mgd->have_beacon)
+ if (!(local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) &&
+ !sdata->deflink.u.mgd.have_beacon)
return false;
rcu_read_lock();
- sta = sta_info_get(sdata, mgd->bssid);
+ sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
if (sta)
authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED);
rcu_read_unlock();
@@ -1700,7 +2185,8 @@ void ieee80211_recalc_ps(struct ieee80211_local *local)
int count = 0;
int timeout;
- if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) {
+ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS) ||
+ ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) {
local->ps_sdata = NULL;
return;
}
@@ -1723,7 +2209,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local)
}
if (count == 1 && ieee80211_powersave_allowed(found)) {
- u8 dtimper = found->u.mgd.dtim_period;
+ u8 dtimper = found->deflink.u.mgd.dtim_period;
timeout = local->dynamic_ps_forced_timeout;
if (timeout < 0)
@@ -1747,9 +2233,9 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata)
{
bool ps_allowed = ieee80211_powersave_allowed(sdata);
- if (sdata->vif.bss_conf.ps != ps_allowed) {
- sdata->vif.bss_conf.ps = ps_allowed;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_PS);
+ if (sdata->vif.cfg.ps != ps_allowed) {
+ sdata->vif.cfg.ps = ps_allowed;
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_PS);
}
}
@@ -1849,14 +2335,15 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t)
void ieee80211_dfs_cac_timer_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
- struct ieee80211_sub_if_data *sdata =
- container_of(delayed_work, struct ieee80211_sub_if_data,
+ struct ieee80211_link_data *link =
+ container_of(delayed_work, struct ieee80211_link_data,
dfs_cac_timer_work);
- struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chandef;
+ struct cfg80211_chan_def chandef = link->conf->chandef;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
mutex_lock(&sdata->local->mtx);
if (sdata->wdev.cac_started) {
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(link);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_FINISHED,
GFP_KERNEL);
@@ -1894,10 +2381,11 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
switch (tx_tspec->action) {
case TX_TSPEC_ACTION_STOP_DOWNGRADE:
/* take the original parameters */
- if (drv_conf_tx(local, sdata, ac, &sdata->tx_conf[ac]))
- sdata_err(sdata,
- "failed to set TX queue parameters for queue %d\n",
- ac);
+ if (drv_conf_tx(local, &sdata->deflink, ac,
+ &sdata->deflink.tx_conf[ac]))
+ link_err(&sdata->deflink,
+ "failed to set TX queue parameters for queue %d\n",
+ ac);
tx_tspec->action = TX_TSPEC_ACTION_NONE;
tx_tspec->downgraded = false;
ret = true;
@@ -1923,11 +2411,11 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
*/
if (non_acm_ac >= IEEE80211_NUM_ACS)
non_acm_ac = IEEE80211_AC_BK;
- if (drv_conf_tx(local, sdata, ac,
- &sdata->tx_conf[non_acm_ac]))
- sdata_err(sdata,
- "failed to set TX queue parameters for queue %d\n",
- ac);
+ if (drv_conf_tx(local, &sdata->deflink, ac,
+ &sdata->deflink.tx_conf[non_acm_ac]))
+ link_err(&sdata->deflink,
+ "failed to set TX queue parameters for queue %d\n",
+ ac);
tx_tspec->action = TX_TSPEC_ACTION_NONE;
ret = true;
schedule_delayed_work(&ifmgd->tx_tspec_wk,
@@ -1945,7 +2433,8 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
{
if (__ieee80211_sta_handle_tspec_ac_params(sdata))
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_QOS);
}
static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work)
@@ -1957,13 +2446,37 @@ static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work)
ieee80211_sta_handle_tspec_ac_params(sdata);
}
+void ieee80211_mgd_set_link_qos_params(struct ieee80211_link_data *link)
+{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_tx_queue_params *params = link->tx_conf;
+ u8 ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ mlme_dbg(sdata,
+ "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
+ ac, params[ac].acm,
+ params[ac].aifs, params[ac].cw_min, params[ac].cw_max,
+ params[ac].txop, params[ac].uapsd,
+ ifmgd->tx_tspec[ac].downgraded);
+ if (!ifmgd->tx_tspec[ac].downgraded &&
+ drv_conf_tx(local, link, ac, &params[ac]))
+ link_err(link,
+ "failed to set TX queue parameters for AC %d\n",
+ ac);
+ }
+}
+
/* MLME */
static bool
ieee80211_sta_wmm_params(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
const u8 *wmm_param, size_t wmm_param_len,
const struct ieee80211_mu_edca_param_set *mu_edca)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
size_t left;
@@ -1992,11 +2505,11 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local,
* the driver about it.
*/
mu_edca_count = mu_edca ? mu_edca->mu_qos_info & 0x0f : -1;
- if (count == ifmgd->wmm_last_param_set &&
- mu_edca_count == ifmgd->mu_edca_last_param_set)
+ if (count == link->u.mgd.wmm_last_param_set &&
+ mu_edca_count == link->u.mgd.mu_edca_last_param_set)
return false;
- ifmgd->wmm_last_param_set = count;
- ifmgd->mu_edca_last_param_set = mu_edca_count;
+ link->u.mgd.wmm_last_param_set = count;
+ link->u.mgd.mu_edca_last_param_set = mu_edca_count;
pos = wmm_param + 8;
left = wmm_param_len - 8;
@@ -2087,23 +2600,13 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local,
}
}
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- mlme_dbg(sdata,
- "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
- ac, params[ac].acm,
- params[ac].aifs, params[ac].cw_min, params[ac].cw_max,
- params[ac].txop, params[ac].uapsd,
- ifmgd->tx_tspec[ac].downgraded);
- sdata->tx_conf[ac] = params[ac];
- if (!ifmgd->tx_tspec[ac].downgraded &&
- drv_conf_tx(local, sdata, ac, &params[ac]))
- sdata_err(sdata,
- "failed to set TX queue parameters for AC %d\n",
- ac);
- }
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ link->tx_conf[ac] = params[ac];
+
+ ieee80211_mgd_set_link_qos_params(link);
/* enable WMM or activate new settings */
- sdata->vif.bss_conf.qos = true;
+ link->conf->qos = true;
return true;
}
@@ -2122,17 +2625,17 @@ static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&sdata->local->mtx);
}
-static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
+static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link,
u16 capab, bool erp_valid, u8 erp)
{
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_supported_band *sband;
u32 changed = 0;
bool use_protection;
bool use_short_preamble;
bool use_short_slot;
- sband = ieee80211_get_sband(sdata);
+ sband = ieee80211_get_link_sband(link);
if (!sband)
return changed;
@@ -2167,27 +2670,28 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
return changed;
}
-static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss,
- u32 bss_info_changed)
+static u32 ieee80211_link_set_associated(struct ieee80211_link_data *link,
+ struct cfg80211_bss *cbss)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_bss *bss = (void *)cbss->priv;
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ u32 changed = BSS_CHANGED_QOS;
- bss_info_changed |= BSS_CHANGED_ASSOC;
- bss_info_changed |= ieee80211_handle_bss_capability(sdata,
- bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value);
+ /* not really used in MLO */
+ sdata->u.mgd.beacon_timeout =
+ usecs_to_jiffies(ieee80211_tu_to_usec(beacon_loss_count *
+ bss_conf->beacon_int));
- sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
- beacon_loss_count * bss_conf->beacon_int));
+ changed |= ieee80211_handle_bss_capability(link,
+ bss_conf->assoc_capability,
+ bss->has_erp_value,
+ bss->erp_value);
- sdata->u.mgd.associated = cbss;
- memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
+ ieee80211_check_rate_mask(link);
- ieee80211_check_rate_mask(sdata);
-
- sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
+ link->u.mgd.bss = cbss;
+ memcpy(link->u.mgd.bssid, cbss->bssid, ETH_ALEN);
if (sdata->vif.p2p ||
sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) {
@@ -2204,52 +2708,106 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
(u8 *) &bss_conf->p2p_noa_attr,
sizeof(bss_conf->p2p_noa_attr));
if (ret >= 2) {
- sdata->u.mgd.p2p_noa_index =
+ link->u.mgd.p2p_noa_index =
bss_conf->p2p_noa_attr.index;
- bss_info_changed |= BSS_CHANGED_P2P_PS;
+ changed |= BSS_CHANGED_P2P_PS;
}
}
rcu_read_unlock();
}
- /* just to be sure */
- ieee80211_stop_poll(sdata);
-
- ieee80211_led_assoc(local, 1);
-
- if (sdata->u.mgd.have_beacon) {
+ if (link->u.mgd.have_beacon) {
/*
* If the AP is buggy we may get here with no DTIM period
* known, so assume it's 1 which is the only safe assumption
* in that case, although if the TIM IE is broken powersave
* probably just won't work at all.
*/
- bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1;
+ bss_conf->dtim_period = link->u.mgd.dtim_period ?: 1;
bss_conf->beacon_rate = bss->beacon_rate;
- bss_info_changed |= BSS_CHANGED_BEACON_INFO;
+ changed |= BSS_CHANGED_BEACON_INFO;
} else {
bss_conf->beacon_rate = NULL;
bss_conf->dtim_period = 0;
}
- bss_conf->assoc = 1;
-
/* Tell the driver to monitor connection quality (if supported) */
if (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI &&
bss_conf->cqm_rssi_thold)
- bss_info_changed |= BSS_CHANGED_CQM;
+ changed |= BSS_CHANGED_CQM;
+
+ return changed;
+}
+
+static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgd_assoc_data *assoc_data,
+ u64 changed[IEEE80211_MLD_MAX_NUM_LINKS])
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
+ u64 vif_changed = BSS_CHANGED_ASSOC;
+ unsigned int link_id;
+
+ sdata->u.mgd.associated = true;
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+ struct ieee80211_link_data *link;
+
+ if (!cbss)
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON(!link))
+ return;
+
+ changed[link_id] |= ieee80211_link_set_associated(link, cbss);
+ }
+
+ /* just to be sure */
+ ieee80211_stop_poll(sdata);
+
+ ieee80211_led_assoc(local, 1);
+
+ vif_cfg->assoc = 1;
/* Enable ARP filtering */
- if (bss_conf->arp_addr_cnt)
- bss_info_changed |= BSS_CHANGED_ARP_FILTER;
+ if (vif_cfg->arp_addr_cnt)
+ vif_changed |= BSS_CHANGED_ARP_FILTER;
+
+ if (sdata->vif.valid_links) {
+ for (link_id = 0;
+ link_id < IEEE80211_MLD_MAX_NUM_LINKS;
+ link_id++) {
+ struct ieee80211_link_data *link;
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+
+ if (!cbss)
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON(!link))
+ return;
+
+ ieee80211_link_info_change_notify(sdata, link,
+ changed[link_id]);
- ieee80211_bss_info_change_notify(sdata, bss_info_changed);
+ ieee80211_recalc_smps(sdata, link);
+ }
+
+ ieee80211_vif_cfg_change_notify(sdata, vif_changed);
+ } else {
+ ieee80211_bss_info_change_notify(sdata,
+ vif_changed | changed[0]);
+ }
mutex_lock(&local->iflist_mtx);
ieee80211_recalc_ps(local);
mutex_unlock(&local->iflist_mtx);
- ieee80211_recalc_smps(sdata);
+ /* leave this here to not change ordering in non-MLO cases */
+ if (!sdata->vif.valid_links)
+ ieee80211_recalc_smps(sdata, &sdata->deflink);
ieee80211_recalc_ps_vif(sdata);
netif_carrier_on(sdata->dev);
@@ -2261,7 +2819,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ unsigned int link_id;
u32 changed = 0;
struct ieee80211_prep_tx_info info = {
.subtype = stype,
@@ -2277,7 +2835,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ieee80211_stop_poll(sdata);
- ifmgd->associated = NULL;
+ ifmgd->associated = false;
+
+ /* other links will be destroyed */
+ sdata->deflink.u.mgd.bss = NULL;
+
netif_carrier_off(sdata->dev);
/*
@@ -2315,13 +2877,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
* driver requested so.
*/
if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
- !ifmgd->have_beacon) {
+ !sdata->deflink.u.mgd.have_beacon) {
drv_mgd_prepare_tx(sdata->local, sdata, &info);
}
- ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid,
- ifmgd->bssid, stype, reason,
- tx, frame_buf);
+ ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr,
+ sdata->vif.cfg.ap_addr, stype,
+ reason, tx, frame_buf);
}
/* flush out frame - make sure the deauth was actually sent */
@@ -2330,22 +2892,24 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
drv_mgd_complete_tx(sdata->local, sdata, &info);
- /* clear bssid only after building the needed mgmt frames */
- eth_zero_addr(ifmgd->bssid);
+ /* clear AP addr only after building the needed mgmt frames */
+ eth_zero_addr(sdata->deflink.u.mgd.bssid);
+ eth_zero_addr(sdata->vif.cfg.ap_addr);
- sdata->vif.bss_conf.ssid_len = 0;
+ sdata->vif.cfg.ssid_len = 0;
/* remove AP and TDLS peers */
sta_info_flush(sdata);
/* finally reset all BSS / config parameters */
- changed |= ieee80211_reset_erp_info(sdata);
+ if (!sdata->vif.valid_links)
+ changed |= ieee80211_reset_erp_info(sdata);
ieee80211_led_assoc(local, 0);
changed |= BSS_CHANGED_ASSOC;
- sdata->vif.bss_conf.assoc = false;
+ sdata->vif.cfg.assoc = false;
- ifmgd->p2p_noa_index = -1;
+ sdata->deflink.u.mgd.p2p_noa_index = -1;
memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
sizeof(sdata->vif.bss_conf.p2p_noa_attr));
@@ -2355,54 +2919,72 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
- /* reset MU-MIMO ownership and group data */
+ /*
+ * reset MU-MIMO ownership and group data in default link,
+ * if used, other links are destroyed
+ */
memset(sdata->vif.bss_conf.mu_group.membership, 0,
sizeof(sdata->vif.bss_conf.mu_group.membership));
memset(sdata->vif.bss_conf.mu_group.position, 0,
sizeof(sdata->vif.bss_conf.mu_group.position));
- changed |= BSS_CHANGED_MU_GROUPS;
- sdata->vif.mu_mimo_owner = false;
+ if (!sdata->vif.valid_links)
+ changed |= BSS_CHANGED_MU_GROUPS;
+ sdata->vif.bss_conf.mu_mimo_owner = false;
- sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
+ sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
del_timer_sync(&local->dynamic_ps_timer);
cancel_work_sync(&local->dynamic_ps_enable_work);
/* Disable ARP filtering */
- if (sdata->vif.bss_conf.arp_addr_cnt)
+ if (sdata->vif.cfg.arp_addr_cnt)
changed |= BSS_CHANGED_ARP_FILTER;
sdata->vif.bss_conf.qos = false;
- changed |= BSS_CHANGED_QOS;
-
- /* The BSSID (not really interesting) and HT changed */
- changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
- ieee80211_bss_info_change_notify(sdata, changed);
+ if (!sdata->vif.valid_links) {
+ changed |= BSS_CHANGED_QOS;
+ /* The BSSID (not really interesting) and HT changed */
+ changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
+ ieee80211_bss_info_change_notify(sdata, changed);
+ } else {
+ ieee80211_vif_cfg_change_notify(sdata, changed);
+ }
/* disassociated - set to defaults now */
- ieee80211_set_wmm_default(sdata, false, false);
+ ieee80211_set_wmm_default(&sdata->deflink, false, false);
del_timer_sync(&sdata->u.mgd.conn_mon_timer);
del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
del_timer_sync(&sdata->u.mgd.timer);
- del_timer_sync(&sdata->u.mgd.chswitch_timer);
+ del_timer_sync(&sdata->deflink.u.mgd.chswitch_timer);
sdata->vif.bss_conf.dtim_period = 0;
sdata->vif.bss_conf.beacon_rate = NULL;
- ifmgd->have_beacon = false;
+ sdata->deflink.u.mgd.have_beacon = false;
+ sdata->deflink.u.mgd.tracking_signal_avg = false;
+ sdata->deflink.u.mgd.disable_wmm_tracking = false;
ifmgd->flags = 0;
+ sdata->deflink.u.mgd.conn_flags = 0;
mutex_lock(&local->mtx);
- ieee80211_vif_release_channel(sdata);
- sdata->vif.csa_active = false;
- ifmgd->csa_waiting_bcn = false;
- ifmgd->csa_ignored_same_chan = false;
- if (sdata->csa_block_tx) {
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+ ieee80211_link_release_channel(link);
+ }
+
+ sdata->vif.bss_conf.csa_active = false;
+ sdata->deflink.u.mgd.csa_waiting_bcn = false;
+ sdata->deflink.u.mgd.csa_ignored_same_chan = false;
+ if (sdata->deflink.csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
+ sdata->deflink.csa_block_tx = false;
}
mutex_unlock(&local->mtx);
@@ -2410,11 +2992,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec));
cancel_delayed_work_sync(&ifmgd->tx_tspec_wk);
- sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
+ sdata->vif.bss_conf.pwr_reduction = 0;
+ sdata->vif.bss_conf.tx_pwr_env_num = 0;
+ memset(sdata->vif.bss_conf.tx_pwr_env, 0,
+ sizeof(sdata->vif.bss_conf.tx_pwr_env));
- bss_conf->pwr_reduction = 0;
- bss_conf->tx_pwr_env_num = 0;
- memset(bss_conf->tx_pwr_env, 0, sizeof(bss_conf->tx_pwr_env));
+ ieee80211_vif_set_links(sdata, 0);
}
static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
@@ -2524,11 +3107,13 @@ static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata,
static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- const struct element *ssid;
- u8 *dst = ifmgd->associated->bssid;
+ u8 *dst = sdata->vif.cfg.ap_addr;
u8 unicast_limit = max(1, max_probe_tries - 3);
struct sta_info *sta;
+ if (WARN_ON(sdata->vif.valid_links))
+ return;
+
/*
* Try sending broadcast probe requests for the last three
* probe requests after the first ones failed since some
@@ -2558,19 +3143,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ifmgd->nullfunc_failed = false;
ieee80211_send_nullfunc(sdata->local, sdata, false);
} else {
- int ssid_len;
-
- rcu_read_lock();
- ssid = ieee80211_bss_get_elem(ifmgd->associated, WLAN_EID_SSID);
- if (WARN_ON_ONCE(ssid == NULL))
- ssid_len = 0;
- else
- ssid_len = ssid->datalen;
-
ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
- ssid->data, ssid_len,
- ifmgd->associated->channel);
- rcu_read_unlock();
+ sdata->vif.cfg.ssid,
+ sdata->vif.cfg.ssid_len,
+ sdata->deflink.u.mgd.bss->channel);
}
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
@@ -2583,6 +3159,9 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
bool already = false;
+ if (WARN_ON(sdata->vif.valid_links))
+ return;
+
if (!ieee80211_sdata_running(sdata))
return;
@@ -2654,17 +3233,18 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
const struct element *ssid;
int ssid_len;
- if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
+ sdata->vif.valid_links))
return NULL;
sdata_assert_lock(sdata);
if (ifmgd->associated)
- cbss = ifmgd->associated;
+ cbss = sdata->deflink.u.mgd.bss;
else if (ifmgd->auth_data)
cbss = ifmgd->auth_data->bss;
- else if (ifmgd->assoc_data)
- cbss = ifmgd->assoc_data->bss;
+ else if (ifmgd->assoc_data && ifmgd->assoc_data->link[0].bss)
+ cbss = ifmgd->assoc_data->link[0].bss;
else
return NULL;
@@ -2718,14 +3298,30 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
return;
}
- tx = !sdata->csa_block_tx;
+ /* in MLO assume we have a link where we can TX the frame */
+ tx = sdata->vif.valid_links || !sdata->deflink.csa_block_tx;
if (!ifmgd->driver_disconnect) {
+ unsigned int link_id;
+
/*
* AP is probably out of range (or not reachable for another
- * reason) so remove the bss struct for that AP.
+ * reason) so remove the bss structs for that AP. In the case
+ * of multi-link, it's not clear that all of them really are
+ * out of range, but if they weren't the driver likely would
+ * have switched to just have a single link active?
*/
- cfg80211_unlink_bss(local->hw.wiphy, ifmgd->associated);
+ for (link_id = 0;
+ link_id < ARRAY_SIZE(sdata->link);
+ link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+ cfg80211_unlink_bss(local->hw.wiphy, link->u.mgd.bss);
+ link->u.mgd.bss = NULL;
+ }
}
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
@@ -2734,12 +3330,13 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
tx, frame_buf);
mutex_lock(&local->mtx);
- sdata->vif.csa_active = false;
- ifmgd->csa_waiting_bcn = false;
- if (sdata->csa_block_tx) {
+ /* the other links will be destroyed */
+ sdata->vif.bss_conf.csa_active = false;
+ sdata->deflink.u.mgd.csa_waiting_bcn = false;
+ if (sdata->deflink.csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
- sdata->csa_block_tx = false;
+ sdata->deflink.csa_block_tx = false;
}
mutex_unlock(&local->mtx);
@@ -2758,21 +3355,20 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
u.mgd.beacon_connection_loss_work);
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- if (ifmgd->associated)
- ifmgd->beacon_loss_count++;
-
if (ifmgd->connection_loss) {
sdata_info(sdata, "Connection to AP %pM lost\n",
- ifmgd->bssid);
+ sdata->vif.cfg.ap_addr);
__ieee80211_disconnect(sdata);
ifmgd->connection_loss = false;
} else if (ifmgd->driver_disconnect) {
sdata_info(sdata,
"Driver requested disconnection from AP %pM\n",
- ifmgd->bssid);
+ sdata->vif.cfg.ap_addr);
__ieee80211_disconnect(sdata);
ifmgd->driver_disconnect = false;
} else {
+ if (ifmgd->associated)
+ sdata->deflink.u.mgd.beacon_loss_count++;
ieee80211_mgd_probe_ap(sdata, true);
}
}
@@ -2840,13 +3436,18 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
* which is not relevant anymore.
*/
del_timer_sync(&sdata->u.mgd.timer);
- sta_info_destroy_addr(sdata, auth_data->bss->bssid);
+ sta_info_destroy_addr(sdata, auth_data->ap_addr);
- eth_zero_addr(sdata->u.mgd.bssid);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
+ /* other links are destroyed */
+ sdata->deflink.u.mgd.conn_flags = 0;
+ eth_zero_addr(sdata->deflink.u.mgd.bssid);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_BSSID);
sdata->u.mgd.flags = 0;
+
mutex_lock(&sdata->local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
+ ieee80211_vif_set_links(sdata, 0);
mutex_unlock(&sdata->local->mtx);
}
@@ -2855,33 +3456,58 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.auth_data = NULL;
}
+enum assoc_status {
+ ASSOC_SUCCESS,
+ ASSOC_REJECTED,
+ ASSOC_TIMEOUT,
+ ASSOC_ABANDON,
+};
+
static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
- bool assoc, bool abandon)
+ enum assoc_status status)
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
sdata_assert_lock(sdata);
- if (!assoc) {
+ if (status != ASSOC_SUCCESS) {
/*
* we are not associated yet, the only timer that could be
* running is the timeout for the association response which
* which is not relevant anymore.
*/
del_timer_sync(&sdata->u.mgd.timer);
- sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
+ sta_info_destroy_addr(sdata, assoc_data->ap_addr);
- eth_zero_addr(sdata->u.mgd.bssid);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
+ sdata->deflink.u.mgd.conn_flags = 0;
+ eth_zero_addr(sdata->deflink.u.mgd.bssid);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_BSSID);
sdata->u.mgd.flags = 0;
- sdata->vif.mu_mimo_owner = false;
+ sdata->vif.bss_conf.mu_mimo_owner = false;
+
+ if (status != ASSOC_REJECTED) {
+ struct cfg80211_assoc_failure data = {
+ .timeout = status == ASSOC_TIMEOUT,
+ };
+ int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(data.bss) !=
+ ARRAY_SIZE(assoc_data->link));
+
+ for (i = 0; i < ARRAY_SIZE(data.bss); i++)
+ data.bss[i] = assoc_data->link[i].bss;
+
+ if (sdata->vif.valid_links)
+ data.ap_mld_addr = assoc_data->ap_addr;
+
+ cfg80211_assoc_failure(sdata->dev, &data);
+ }
mutex_lock(&sdata->local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
+ ieee80211_vif_set_links(sdata, 0);
mutex_unlock(&sdata->local->mtx);
-
- if (abandon)
- cfg80211_abandon_assoc(sdata->dev, assoc_data->bss);
}
kfree(assoc_data);
@@ -2913,15 +3539,15 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
ieee80211_send_auth(sdata, 3, auth_data->algorithm, 0,
(void *)challenge,
challenge->datalen + sizeof(*challenge),
- auth_data->bss->bssid, auth_data->bss->bssid,
+ auth_data->ap_addr, auth_data->ap_addr,
auth_data->key, auth_data->key_len,
auth_data->key_idx, tx_flags);
}
-static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
- const u8 *bssid)
+static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ const u8 *ap_addr = ifmgd->auth_data->ap_addr;
struct sta_info *sta;
bool result = true;
@@ -2933,14 +3559,14 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
/* move station state to auth */
mutex_lock(&sdata->local->sta_mtx);
- sta = sta_info_get(sdata, bssid);
+ sta = sta_info_get(sdata, ap_addr);
if (!sta) {
- WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
+ WARN_ONCE(1, "%s: STA %pM not found", sdata->name, ap_addr);
result = false;
goto out;
}
if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
- sdata_info(sdata, "failed moving %pM to auth\n", bssid);
+ sdata_info(sdata, "failed moving %pM to auth\n", ap_addr);
result = false;
goto out;
}
@@ -2954,7 +3580,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u8 bssid[ETH_ALEN];
u16 auth_alg, auth_transaction, status_code;
struct ieee80211_event event = {
.type = MLME_EVENT,
@@ -2972,9 +3597,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
if (!ifmgd->auth_data || ifmgd->auth_data->done)
return;
- memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN);
-
- if (!ether_addr_equal(bssid, mgmt->bssid))
+ if (!ether_addr_equal(ifmgd->auth_data->ap_addr, mgmt->bssid))
return;
auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
@@ -3001,8 +3624,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED ||
(auth_transaction == 1 &&
(status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
- status_code == WLAN_STATUS_SAE_PK))))
+ status_code == WLAN_STATUS_SAE_PK)))) {
+ /* waiting for userspace now */
+ ifmgd->auth_data->waiting = true;
+ ifmgd->auth_data->timeout =
+ jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY;
+ ifmgd->auth_data->timeout_started = true;
+ run_again(sdata, ifmgd->auth_data->timeout);
goto notify_driver;
+ }
sdata_info(sdata, "%pM denied authentication (status %d)\n",
mgmt->sa, status_code);
@@ -3041,7 +3671,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
(auth_transaction == 2 &&
ifmgd->auth_data->expected_transaction == 2)) {
- if (!ieee80211_mark_sta_auth(sdata, bssid))
+ if (!ieee80211_mark_sta_auth(sdata))
return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
@@ -3128,11 +3758,9 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
}
if (ifmgd->associated &&
- ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) {
- const u8 *bssid = ifmgd->associated->bssid;
-
+ ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) {
sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n",
- bssid, reason_code,
+ sdata->vif.cfg.ap_addr, reason_code,
ieee80211_get_reason_code_string(reason_code));
ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
@@ -3143,15 +3771,13 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
}
if (ifmgd->assoc_data &&
- ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
- const u8 *bssid = ifmgd->assoc_data->bss->bssid;
-
+ ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->ap_addr)) {
sdata_info(sdata,
"deauthenticated from %pM while associating (Reason: %u=%s)\n",
- bssid, reason_code,
+ ifmgd->assoc_data->ap_addr, reason_code,
ieee80211_get_reason_code_string(reason_code));
- ieee80211_destroy_assoc_data(sdata, false, true);
+ ieee80211_destroy_assoc_data(sdata, ASSOC_ABANDON);
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
return;
@@ -3171,7 +3797,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
return;
if (!ifmgd->associated ||
- !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+ !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr))
return;
reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
@@ -3182,7 +3808,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
}
sdata_info(sdata, "disassociated from %pM (Reason: %u=%s)\n",
- mgmt->sa, reason_code,
+ sdata->vif.cfg.ap_addr, reason_code,
ieee80211_get_reason_code_string(reason_code));
ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
@@ -3242,7 +3868,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
}
}
-static bool ieee80211_twt_req_supported(const struct sta_info *sta,
+static bool ieee80211_twt_req_supported(const struct link_sta_info *link_sta,
const struct ieee802_11_elems *elems)
{
if (elems->ext_capab_len < 10)
@@ -3251,18 +3877,18 @@ static bool ieee80211_twt_req_supported(const struct sta_info *sta,
if (!(elems->ext_capab[9] & WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT))
return false;
- return sta->sta.he_cap.he_cap_elem.mac_cap_info[0] &
+ return link_sta->pub->he_cap.he_cap_elem.mac_cap_info[0] &
IEEE80211_HE_MAC_CAP0_TWT_RES;
}
-static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
+static int ieee80211_recalc_twt_req(struct ieee80211_link_data *link,
+ struct link_sta_info *link_sta,
struct ieee802_11_elems *elems)
{
- bool twt = ieee80211_twt_req_supported(sta, elems);
+ bool twt = ieee80211_twt_req_supported(link_sta, elems);
- if (sdata->vif.bss_conf.twt_requester != twt) {
- sdata->vif.bss_conf.twt_requester = twt;
+ if (link->conf->twt_requester != twt) {
+ link->conf->twt_requester = twt;
return BSS_CHANGED_TWT;
}
return 0;
@@ -3271,72 +3897,53 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss_conf *bss_conf,
struct ieee80211_supported_band *sband,
- struct sta_info *sta)
+ struct link_sta_info *link_sta)
{
const struct ieee80211_sta_he_cap *own_he_cap =
ieee80211_get_he_iftype_cap(sband,
ieee80211_vif_type_p2p(&sdata->vif));
return bss_conf->he_support &&
- (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+ (link_sta->pub->he_cap.he_cap_elem.mac_cap_info[2] &
IEEE80211_HE_MAC_CAP2_BCAST_TWT) &&
own_he_cap &&
(own_he_cap->he_cap_elem.mac_cap_info[2] &
IEEE80211_HE_MAC_CAP2_BCAST_TWT);
}
-static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss,
- struct ieee80211_mgmt *mgmt, size_t len,
- struct ieee802_11_elems *elems)
+static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
+ struct link_sta_info *link_sta,
+ struct cfg80211_bss *cbss,
+ struct ieee80211_mgmt *mgmt,
+ const u8 *elem_start,
+ unsigned int elem_len,
+ u64 *changed)
{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
+ struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_local *local = sdata->local;
- struct ieee80211_supported_band *sband;
- struct sta_info *sta;
- u16 capab_info, aid;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- const struct cfg80211_bss_ies *bss_ies = NULL;
- struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ struct ieee80211_elems_parse_params parse_params = {
+ .start = elem_start,
+ .len = elem_len,
+ .bss = cbss,
+ .link_id = link == &sdata->deflink ? -1 : link->link_id,
+ .from_ap = true,
+ };
bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
bool is_s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
- u32 changed = 0;
- u8 *pos;
- int err;
+ const struct cfg80211_bss_ies *bss_ies = NULL;
+ struct ieee80211_supported_band *sband;
+ struct ieee802_11_elems *elems;
+ u16 capab_info;
bool ret;
- /* AssocResp and ReassocResp have identical structure */
-
- pos = mgmt->u.assoc_resp.variable;
- aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
- if (is_s1g) {
- pos = (u8 *) mgmt->u.s1g_assoc_resp.variable;
- aid = 0; /* TODO */
- }
- capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
- elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false,
- mgmt->bssid, assoc_data->bss->bssid);
-
+ elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
return false;
- if (elems->aid_resp)
- aid = le16_to_cpu(elems->aid_resp->aid);
-
- /*
- * The 5 MSB of the AID field are reserved
- * (802.11-2016 9.4.1.8 AID field)
- */
- aid &= 0x7ff;
-
- ifmgd->broken_ap = false;
-
- if (aid == 0 || aid > IEEE80211_MAX_AID) {
- sdata_info(sdata, "invalid AID value %d (out of range), turn off PS\n",
- aid);
- aid = 0;
- ifmgd->broken_ap = true;
- }
+ /* FIXME: use from STA profile element after parsing that */
+ capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
if (!is_s1g && !elems->supp_rates) {
sdata_info(sdata, "no SuppRates element in AssocResp\n");
@@ -3344,8 +3951,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- sdata->vif.bss_conf.aid = aid;
- ifmgd->tdls_chan_switch_prohibited =
+ link->u.mgd.tdls_chan_switch_prohibited =
elems->ext_capab && elems->ext_capab_len >= 5 &&
(elems->ext_capab[4] & WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED);
@@ -3358,9 +3964,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
*/
if (!is_6ghz &&
((assoc_data->wmm && !elems->wmm_param) ||
- (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+ (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
- (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems *bss_elems;
@@ -3376,9 +3982,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- bss_elems = ieee802_11_parse_elems(bss_ies->data, bss_ies->len,
- false, mgmt->bssid,
- assoc_data->bss->bssid);
+ parse_params.start = bss_ies->data;
+ parse_params.len = bss_ies->len;
+ bss_elems = ieee802_11_parse_elems_full(&parse_params);
if (!bss_elems) {
ret = false;
goto out;
@@ -3396,25 +4002,25 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* have to include the IEs in the (re)association response.
*/
if (!elems->ht_cap_elem && bss_elems->ht_cap_elem &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) {
elems->ht_cap_elem = bss_elems->ht_cap_elem;
sdata_info(sdata,
"AP bug: HT capability missing from AssocResp\n");
}
if (!elems->ht_operation && bss_elems->ht_operation &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) {
elems->ht_operation = bss_elems->ht_operation;
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
elems->vht_cap_elem = bss_elems->vht_cap_elem;
sdata_info(sdata,
"AP bug: VHT capa missing from AssocResp\n");
}
if (!elems->vht_operation && bss_elems->vht_operation &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
elems->vht_operation = bss_elems->vht_operation;
sdata_info(sdata,
"AP bug: VHT operation missing from AssocResp\n");
@@ -3427,7 +4033,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* We previously checked these in the beacon/probe response, so
* they should be present here. This is just a safety net.
*/
- if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+ if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) &&
(!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) {
sdata_info(sdata,
"HT AP is missing WMM params or HT capability/operation\n");
@@ -3435,7 +4041,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) &&
(!elems->vht_cap_elem || !elems->vht_operation)) {
sdata_info(sdata,
"VHT AP is missing VHT capability/operation\n");
@@ -3443,7 +4049,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- if (is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+ if (is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
!elems->he_6ghz_capa) {
sdata_info(sdata,
"HE 6 GHz AP is missing HE 6 GHz band capability\n");
@@ -3451,28 +4057,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- mutex_lock(&sdata->local->sta_mtx);
- /*
- * station info was already allocated and inserted before
- * the association and should be available to us
- */
- sta = sta_info_get(sdata, cbss->bssid);
- if (WARN_ON(!sta)) {
- mutex_unlock(&sdata->local->sta_mtx);
- ret = false;
- goto out;
- }
-
- sband = ieee80211_get_sband(sdata);
- if (!sband) {
- mutex_unlock(&sdata->local->sta_mtx);
+ if (WARN_ON(!link->conf->chandef.chan)) {
ret = false;
goto out;
}
+ sband = local->hw.wiphy->bands[link->conf->chandef.chan->band];
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+ if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
(!elems->he_cap || !elems->he_operation)) {
- mutex_unlock(&sdata->local->sta_mtx);
sdata_info(sdata,
"HE AP is missing HE capability/operation\n");
ret = false;
@@ -3480,23 +4072,25 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
/* Set up internal HT/VHT capabilities */
- if (elems->ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+ if (elems->ht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT))
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
- elems->ht_cap_elem, sta);
+ elems->ht_cap_elem,
+ link_sta);
- if (elems->vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT))
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- elems->vht_cap_elem, sta);
+ elems->vht_cap_elem,
+ link_sta);
- if (elems->he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+ if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
elems->he_cap) {
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
elems->he_6ghz_capa,
- sta);
+ link_sta);
- bss_conf->he_support = sta->sta.he_cap.has_he;
+ bss_conf->he_support = link_sta->pub->he_cap.has_he;
if (elems->rsnx && elems->rsnx_len &&
(elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
wiphy_ext_feature_isset(local->hw.wiphy,
@@ -3505,15 +4099,30 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
else
bss_conf->twt_protected = false;
- changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
+ *changed |= ieee80211_recalc_twt_req(link, link_sta, elems);
+
+ if (elems->eht_operation && elems->eht_cap &&
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) {
+ ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband,
+ elems->he_cap,
+ elems->he_cap_len,
+ elems->eht_cap,
+ elems->eht_cap_len,
+ link_sta);
+
+ bss_conf->eht_support = link_sta->pub->eht_cap.has_eht;
+ } else {
+ bss_conf->eht_support = false;
+ }
} else {
bss_conf->he_support = false;
bss_conf->twt_requester = false;
bss_conf->twt_protected = false;
+ bss_conf->eht_support = false;
}
bss_conf->twt_broadcast =
- ieee80211_twt_bcast_support(sdata, bss_conf, sband, sta);
+ ieee80211_twt_bcast_support(sdata, bss_conf, sband, link_sta);
if (bss_conf->he_support) {
bss_conf->he_bss_color.color =
@@ -3527,14 +4136,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
if (bss_conf->he_bss_color.enabled)
- changed |= BSS_CHANGED_HE_BSS_COLOR;
+ *changed |= BSS_CHANGED_HE_BSS_COLOR;
bss_conf->htc_trig_based_pkt_ext =
le32_get_bits(elems->he_operation->he_oper_params,
- IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK);
+ IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK);
bss_conf->frame_time_rts_th =
le32_get_bits(elems->he_operation->he_oper_params,
- IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
+ IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
bss_conf->uora_exists = !!elems->uora_element;
if (elems->uora_element)
@@ -3572,65 +4181,34 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
nss = *elems->opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
nss += 1;
- sta->sta.rx_nss = nss;
- }
-
- rate_control_rate_init(sta);
-
- if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
- set_sta_flag(sta, WLAN_STA_MFP);
- sta->sta.mfp = true;
- } else {
- sta->sta.mfp = false;
+ link_sta->pub->rx_nss = nss;
}
- sta->sta.wme = (elems->wmm_param || elems->s1g_capab) &&
- local->hw.queues >= IEEE80211_NUM_ACS;
-
- err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
- if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
- err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
- if (err) {
- sdata_info(sdata,
- "failed to move station %pM to desired state\n",
- sta->sta.addr);
- WARN_ON(__sta_info_destroy(sta));
- mutex_unlock(&sdata->local->sta_mtx);
- ret = false;
- goto out;
- }
-
- if (sdata->wdev.use_4addr)
- drv_sta_set_4addr(local, sdata, &sta->sta, true);
-
- mutex_unlock(&sdata->local->sta_mtx);
-
/*
* Always handle WMM once after association regardless
* of the first value the AP uses. Setting -1 here has
* that effect because the AP values is an unsigned
* 4-bit value.
*/
- ifmgd->wmm_last_param_set = -1;
- ifmgd->mu_edca_last_param_set = -1;
+ link->u.mgd.wmm_last_param_set = -1;
+ link->u.mgd.mu_edca_last_param_set = -1;
- if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
- ieee80211_set_wmm_default(sdata, false, false);
- } else if (!ieee80211_sta_wmm_params(local, sdata, elems->wmm_param,
+ if (link->u.mgd.disable_wmm_tracking) {
+ ieee80211_set_wmm_default(link, false, false);
+ } else if (!ieee80211_sta_wmm_params(local, link, elems->wmm_param,
elems->wmm_param_len,
elems->mu_edca_param_set)) {
/* still enable QoS since we might have HT/VHT */
- ieee80211_set_wmm_default(sdata, false, true);
- /* set the disable-WMM flag in this case to disable
+ ieee80211_set_wmm_default(link, false, true);
+ /* disable WMM tracking in this case to disable
* tracking WMM parameter changes in the beacon if
* the parameters weren't actually valid. Doing so
* avoids changing parameters very strangely when
* the AP is going back and forth between valid and
* invalid parameters.
*/
- ifmgd->flags |= IEEE80211_STA_DISABLE_WMM;
+ link->u.mgd.disable_wmm_tracking = true;
}
- changed |= BSS_CHANGED_QOS;
if (elems->max_idle_period_ie) {
bss_conf->max_idle_period =
@@ -3638,7 +4216,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
bss_conf->protected_keep_alive =
!!(elems->max_idle_period_ie->idle_options &
WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE);
- changed |= BSS_CHANGED_KEEP_ALIVE;
+ *changed |= BSS_CHANGED_KEEP_ALIVE;
} else {
bss_conf->max_idle_period = 0;
bss_conf->protected_keep_alive = false;
@@ -3647,7 +4225,767 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
/* set assoc capability (AID was already set earlier),
* ieee80211_set_associated() will tell the driver */
bss_conf->assoc_capability = capab_info;
- ieee80211_set_associated(sdata, cbss, changed);
+
+ ret = true;
+out:
+ kfree(elems);
+ kfree(bss_ies);
+ return ret;
+}
+
+static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link,
+ struct sta_info *sta,
+ struct link_sta_info *link_sta,
+ struct cfg80211_bss *cbss)
+{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_bss *bss = (void *)cbss->priv;
+ u32 rates = 0, basic_rates = 0;
+ bool have_higher_than_11mbit = false;
+ int min_rate = INT_MAX, min_rate_index = -1;
+ /* this is clearly wrong for MLO but we'll just remove it later */
+ int shift = ieee80211_vif_get_shift(&sdata->vif);
+ struct ieee80211_supported_band *sband;
+
+ memcpy(link_sta->addr, cbss->bssid, ETH_ALEN);
+ memcpy(link_sta->pub->addr, cbss->bssid, ETH_ALEN);
+
+ /* TODO: S1G Basic Rate Set is expressed elsewhere */
+ if (cbss->channel->band == NL80211_BAND_S1GHZ) {
+ ieee80211_s1g_sta_rate_init(sta);
+ return 0;
+ }
+
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+
+ ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len,
+ &rates, &basic_rates, &have_higher_than_11mbit,
+ &min_rate, &min_rate_index, shift);
+
+ /*
+ * This used to be a workaround for basic rates missing
+ * in the association response frame. Now that we no
+ * longer use the basic rates from there, it probably
+ * doesn't happen any more, but keep the workaround so
+ * in case some *other* APs are buggy in different ways
+ * we can connect -- with a warning.
+ * Allow this workaround only in case the AP provided at least
+ * one rate.
+ */
+ if (min_rate_index < 0) {
+ link_info(link, "No legacy rates in association response\n");
+ return -EINVAL;
+ } else if (!basic_rates) {
+ link_info(link, "No basic rates, using min rate instead\n");
+ basic_rates = BIT(min_rate_index);
+ }
+
+ if (rates)
+ link_sta->pub->supp_rates[cbss->channel->band] = rates;
+ else
+ link_info(link, "No rates found, keeping mandatory only\n");
+
+ link->conf->basic_rates = basic_rates;
+
+ /* cf. IEEE 802.11 9.2.12 */
+ link->operating_11g_mode = sband->band == NL80211_BAND_2GHZ &&
+ have_higher_than_11mbit;
+
+ return 0;
+}
+
+static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link,
+ struct cfg80211_bss *cbss)
+{
+ struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp;
+ const struct element *ht_cap_elem, *vht_cap_elem;
+ const struct cfg80211_bss_ies *ies;
+ const struct ieee80211_ht_cap *ht_cap;
+ const struct ieee80211_vht_cap *vht_cap;
+ const struct ieee80211_he_cap_elem *he_cap;
+ const struct element *he_cap_elem;
+ u16 mcs_80_map, mcs_160_map;
+ int i, mcs_nss_size;
+ bool support_160;
+ u8 chains = 1;
+
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)
+ return chains;
+
+ ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY);
+ if (ht_cap_elem && ht_cap_elem->datalen >= sizeof(*ht_cap)) {
+ ht_cap = (void *)ht_cap_elem->data;
+ chains = ieee80211_mcs_to_chains(&ht_cap->mcs);
+ /*
+ * TODO: use "Tx Maximum Number Spatial Streams Supported" and
+ * "Tx Unequal Modulation Supported" fields.
+ */
+ }
+
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)
+ return chains;
+
+ vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
+ if (vht_cap_elem && vht_cap_elem->datalen >= sizeof(*vht_cap)) {
+ u8 nss;
+ u16 tx_mcs_map;
+
+ vht_cap = (void *)vht_cap_elem->data;
+ tx_mcs_map = le16_to_cpu(vht_cap->supp_mcs.tx_mcs_map);
+ for (nss = 8; nss > 0; nss--) {
+ if (((tx_mcs_map >> (2 * (nss - 1))) & 3) !=
+ IEEE80211_VHT_MCS_NOT_SUPPORTED)
+ break;
+ }
+ /* TODO: use "Tx Highest Supported Long GI Data Rate" field? */
+ chains = max(chains, nss);
+ }
+
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE)
+ return chains;
+
+ ies = rcu_dereference(cbss->ies);
+ he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY,
+ ies->data, ies->len);
+
+ if (!he_cap_elem || he_cap_elem->datalen < sizeof(*he_cap))
+ return chains;
+
+ /* skip one byte ext_tag_id */
+ he_cap = (void *)(he_cap_elem->data + 1);
+ mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap);
+
+ /* invalid HE IE */
+ if (he_cap_elem->datalen < 1 + mcs_nss_size + sizeof(*he_cap))
+ return chains;
+
+ /* mcs_nss is right after he_cap info */
+ he_mcs_nss_supp = (void *)(he_cap + 1);
+
+ mcs_80_map = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80);
+
+ for (i = 7; i >= 0; i--) {
+ u8 mcs_80 = mcs_80_map >> (2 * i) & 3;
+
+ if (mcs_80 != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+ chains = max_t(u8, chains, i + 1);
+ break;
+ }
+ }
+
+ support_160 = he_cap->phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+ if (!support_160)
+ return chains;
+
+ mcs_160_map = le16_to_cpu(he_mcs_nss_supp->tx_mcs_160);
+ for (i = 7; i >= 0; i--) {
+ u8 mcs_160 = mcs_160_map >> (2 * i) & 3;
+
+ if (mcs_160 != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+ chains = max_t(u8, chains, i + 1);
+ break;
+ }
+ }
+
+ return chains;
+}
+
+static bool
+ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_bss_ies *ies,
+ const struct ieee80211_he_operation *he_op)
+{
+ const struct element *he_cap_elem;
+ const struct ieee80211_he_cap_elem *he_cap;
+ struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp;
+ u16 mcs_80_map_tx, mcs_80_map_rx;
+ u16 ap_min_req_set;
+ int mcs_nss_size;
+ int nss;
+
+ he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY,
+ ies->data, ies->len);
+
+ if (!he_cap_elem)
+ return false;
+
+ /* invalid HE IE */
+ if (he_cap_elem->datalen < 1 + sizeof(*he_cap)) {
+ sdata_info(sdata,
+ "Invalid HE elem, Disable HE\n");
+ return false;
+ }
+
+ /* skip one byte ext_tag_id */
+ he_cap = (void *)(he_cap_elem->data + 1);
+ mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap);
+
+ /* invalid HE IE */
+ if (he_cap_elem->datalen < 1 + sizeof(*he_cap) + mcs_nss_size) {
+ sdata_info(sdata,
+ "Invalid HE elem with nss size, Disable HE\n");
+ return false;
+ }
+
+ /* mcs_nss is right after he_cap info */
+ he_mcs_nss_supp = (void *)(he_cap + 1);
+
+ mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80);
+ mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80);
+
+ /* P802.11-REVme/D0.3
+ * 27.1.1 Introduction to the HE PHY
+ * ...
+ * An HE STA shall support the following features:
+ * ...
+ * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all
+ * supported channel widths for HE SU PPDUs
+ */
+ if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) {
+ sdata_info(sdata,
+ "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n",
+ mcs_80_map_tx, mcs_80_map_rx);
+ return false;
+ }
+
+ if (!he_op)
+ return true;
+
+ ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+
+ /*
+ * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
+ * zeroes, which is nonsense, and completely inconsistent with itself
+ * (it doesn't have 8 streams). Accept the settings in this case anyway.
+ */
+ if (!ap_min_req_set)
+ return true;
+
+ /* make sure the AP is consistent with itself
+ *
+ * P802.11-REVme/D0.3
+ * 26.17.1 Basic HE BSS operation
+ *
+ * A STA that is operating in an HE BSS shall be able to receive and
+ * transmit at each of the <HE-MCS, NSS> tuple values indicated by the
+ * Basic HE-MCS And NSS Set field of the HE Operation parameter of the
+ * MLME-START.request primitive and shall be able to receive at each of
+ * the <HE-MCS, NSS> tuple values indicated by the Supported HE-MCS and
+ * NSS Set field in the HE Capabilities parameter of the MLMESTART.request
+ * primitive
+ */
+ for (nss = 8; nss > 0; nss--) {
+ u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+ u8 ap_rx_val;
+ u8 ap_tx_val;
+
+ if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ continue;
+
+ ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3;
+ ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3;
+
+ if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) {
+ sdata_info(sdata,
+ "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n",
+ nss, ap_rx_val, ap_rx_val, ap_op_val);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ const struct ieee80211_he_operation *he_op)
+{
+ const struct ieee80211_sta_he_cap *sta_he_cap =
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ u16 ap_min_req_set;
+ int i;
+
+ if (!sta_he_cap || !he_op)
+ return false;
+
+ ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+
+ /*
+ * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all
+ * zeroes, which is nonsense, and completely inconsistent with itself
+ * (it doesn't have 8 streams). Accept the settings in this case anyway.
+ */
+ if (!ap_min_req_set)
+ return true;
+
+ /* Need to go over for 80MHz, 160MHz and for 80+80 */
+ for (i = 0; i < 3; i++) {
+ const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
+ &sta_he_cap->he_mcs_nss_supp;
+ u16 sta_mcs_map_rx =
+ le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
+ u16 sta_mcs_map_tx =
+ le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
+ u8 nss;
+ bool verified = true;
+
+ /*
+ * For each band there is a maximum of 8 spatial streams
+ * possible. Each of the sta_mcs_map_* is a 16-bit struct built
+ * of 2 bits per NSS (1-8), with the values defined in enum
+ * ieee80211_he_mcs_support. Need to make sure STA TX and RX
+ * capabilities aren't less than the AP's minimum requirements
+ * for this HE BSS per SS.
+ * It is enough to find one such band that meets the reqs.
+ */
+ for (nss = 8; nss > 0; nss--) {
+ u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
+ u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
+ u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+
+ if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ continue;
+
+ /*
+ * Make sure the HE AP doesn't require MCSs that aren't
+ * supported by the client as required by spec
+ *
+ * P802.11-REVme/D0.3
+ * 26.17.1 Basic HE BSS operation
+ *
+ * An HE STA shall not attempt to join * (MLME-JOIN.request primitive)
+ * a BSS, unless it supports (i.e., is able to both transmit and
+ * receive using) all of the <HE-MCS, NSS> tuples in the basic
+ * HE-MCS and NSS set.
+ */
+ if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+ (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
+ verified = false;
+ break;
+ }
+ }
+
+ if (verified)
+ return true;
+ }
+
+ /* If here, STA doesn't meet AP's HE min requirements */
+ return false;
+}
+
+static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ struct cfg80211_bss *cbss,
+ ieee80211_conn_flags_t *conn_flags)
+{
+ struct ieee80211_local *local = sdata->local;
+ const struct ieee80211_ht_cap *ht_cap = NULL;
+ const struct ieee80211_ht_operation *ht_oper = NULL;
+ const struct ieee80211_vht_operation *vht_oper = NULL;
+ const struct ieee80211_he_operation *he_oper = NULL;
+ const struct ieee80211_eht_operation *eht_oper = NULL;
+ const struct ieee80211_s1g_oper_ie *s1g_oper = NULL;
+ struct ieee80211_supported_band *sband;
+ struct cfg80211_chan_def chandef;
+ bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
+ bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
+ struct ieee80211_bss *bss = (void *)cbss->priv;
+ struct ieee80211_elems_parse_params parse_params = {
+ .bss = cbss,
+ .link_id = -1,
+ .from_ap = true,
+ };
+ struct ieee802_11_elems *elems;
+ const struct cfg80211_bss_ies *ies;
+ int ret;
+ u32 i;
+ bool have_80mhz;
+
+ rcu_read_lock();
+
+ ies = rcu_dereference(cbss->ies);
+ parse_params.start = ies->data;
+ parse_params.len = ies->len;
+ elems = ieee802_11_parse_elems_full(&parse_params);
+ if (!elems) {
+ rcu_read_unlock();
+ return -ENOMEM;
+ }
+
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+
+ *conn_flags &= ~(IEEE80211_CONN_DISABLE_40MHZ |
+ IEEE80211_CONN_DISABLE_80P80MHZ |
+ IEEE80211_CONN_DISABLE_160MHZ);
+
+ /* disable HT/VHT/HE if we don't support them */
+ if (!sband->ht_cap.ht_supported && !is_6ghz) {
+ mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE/EHT\n");
+ *conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ *conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (!sband->vht_cap.vht_supported && is_5ghz) {
+ mlme_dbg(sdata, "VHT not supported, disabling VHT/HE/EHT\n");
+ *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ *conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (!ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif))) {
+ mlme_dbg(sdata, "HE not supported, disabling HE and EHT\n");
+ *conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (!ieee80211_get_eht_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif))) {
+ mlme_dbg(sdata, "EHT not supported, disabling EHT\n");
+ *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (!(*conn_flags & IEEE80211_CONN_DISABLE_HT) && !is_6ghz) {
+ ht_oper = elems->ht_operation;
+ ht_cap = elems->ht_cap_elem;
+
+ if (!ht_cap) {
+ *conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ ht_oper = NULL;
+ }
+ }
+
+ if (!(*conn_flags & IEEE80211_CONN_DISABLE_VHT) && !is_6ghz) {
+ vht_oper = elems->vht_operation;
+ if (vht_oper && !ht_oper) {
+ vht_oper = NULL;
+ sdata_info(sdata,
+ "AP advertised VHT without HT, disabling HT/VHT/HE\n");
+ *conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ *conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ *conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (!elems->vht_cap_elem) {
+ *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ vht_oper = NULL;
+ }
+ }
+
+ if (!(*conn_flags & IEEE80211_CONN_DISABLE_HE)) {
+ he_oper = elems->he_operation;
+
+ if (link && is_6ghz) {
+ struct ieee80211_bss_conf *bss_conf;
+ u8 j = 0;
+
+ bss_conf = link->conf;
+
+ if (elems->pwr_constr_elem)
+ bss_conf->pwr_reduction = *elems->pwr_constr_elem;
+
+ BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
+ ARRAY_SIZE(elems->tx_pwr_env));
+
+ for (i = 0; i < elems->tx_pwr_env_num; i++) {
+ if (elems->tx_pwr_env_len[i] >
+ sizeof(bss_conf->tx_pwr_env[j]))
+ continue;
+
+ bss_conf->tx_pwr_env_num++;
+ memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
+ elems->tx_pwr_env_len[i]);
+ j++;
+ }
+ }
+
+ if (!ieee80211_verify_peer_he_mcs_support(sdata, ies, he_oper) ||
+ !ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
+ *conn_flags |= IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ /*
+ * EHT requires HE to be supported as well. Specifically for 6 GHz
+ * channels, the operation channel information can only be deduced from
+ * both the 6 GHz operation information (from the HE operation IE) and
+ * EHT operation.
+ */
+ if (!(*conn_flags &
+ (IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT)) &&
+ he_oper) {
+ const struct cfg80211_bss_ies *cbss_ies;
+ const u8 *eht_oper_ie;
+
+ cbss_ies = rcu_dereference(cbss->ies);
+ eht_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_EHT_OPERATION,
+ cbss_ies->data, cbss_ies->len);
+ if (eht_oper_ie && eht_oper_ie[1] >=
+ 1 + sizeof(struct ieee80211_eht_operation))
+ eht_oper = (void *)(eht_oper_ie + 3);
+ else
+ eht_oper = NULL;
+ }
+
+ /* Allow VHT if at least one channel on the sband supports 80 MHz */
+ have_80mhz = false;
+ for (i = 0; i < sband->n_channels; i++) {
+ if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+ IEEE80211_CHAN_NO_80MHZ))
+ continue;
+
+ have_80mhz = true;
+ break;
+ }
+
+ if (!have_80mhz) {
+ sdata_info(sdata, "80 MHz not supported, disabling VHT\n");
+ *conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ }
+
+ if (sband->band == NL80211_BAND_S1GHZ) {
+ s1g_oper = elems->s1g_oper;
+ if (!s1g_oper)
+ sdata_info(sdata,
+ "AP missing S1G operation element?\n");
+ }
+
+ *conn_flags |=
+ ieee80211_determine_chantype(sdata, link, *conn_flags,
+ sband,
+ cbss->channel,
+ bss->vht_cap_info,
+ ht_oper, vht_oper,
+ he_oper, eht_oper,
+ s1g_oper,
+ &chandef, false);
+
+ if (link)
+ link->needed_rx_chains =
+ min(ieee80211_max_rx_chains(link, cbss),
+ local->rx_chains);
+
+ rcu_read_unlock();
+ /* the element data was RCU protected so no longer valid anyway */
+ kfree(elems);
+ elems = NULL;
+
+ if (*conn_flags & IEEE80211_CONN_DISABLE_HE && is_6ghz) {
+ sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
+ return -EINVAL;
+ }
+
+ if (!link)
+ return 0;
+
+ /* will change later if needed */
+ link->smps_mode = IEEE80211_SMPS_OFF;
+
+ mutex_lock(&local->mtx);
+ /*
+ * If this fails (possibly due to channel context sharing
+ * on incompatible channels, e.g. 80+80 and 160 sharing the
+ * same control channel) try to use a smaller bandwidth.
+ */
+ ret = ieee80211_link_use_channel(link, &chandef,
+ IEEE80211_CHANCTX_SHARED);
+
+ /* don't downgrade for 5 and 10 MHz channels, though. */
+ if (chandef.width == NL80211_CHAN_WIDTH_5 ||
+ chandef.width == NL80211_CHAN_WIDTH_10)
+ goto out;
+
+ while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
+ *conn_flags |=
+ ieee80211_chandef_downgrade(&chandef);
+ ret = ieee80211_link_use_channel(link, &chandef,
+ IEEE80211_CHANCTX_SHARED);
+ }
+ out:
+ mutex_unlock(&local->mtx);
+ return ret;
+}
+
+static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies,
+ u8 *dtim_count, u8 *dtim_period)
+{
+ const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len);
+ const u8 *idx_ie = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, ies->data,
+ ies->len);
+ const struct ieee80211_tim_ie *tim = NULL;
+ const struct ieee80211_bssid_index *idx;
+ bool valid = tim_ie && tim_ie[1] >= 2;
+
+ if (valid)
+ tim = (void *)(tim_ie + 2);
+
+ if (dtim_count)
+ *dtim_count = valid ? tim->dtim_count : 0;
+
+ if (dtim_period)
+ *dtim_period = valid ? tim->dtim_period : 0;
+
+ /* Check if value is overridden by non-transmitted profile */
+ if (!idx_ie || idx_ie[1] < 3)
+ return valid;
+
+ idx = (void *)(idx_ie + 2);
+
+ if (dtim_count)
+ *dtim_count = idx->dtim_count;
+
+ if (dtim_period)
+ *dtim_period = idx->dtim_period;
+
+ return true;
+}
+
+static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt,
+ struct ieee802_11_elems *elems,
+ const u8 *elem_start, unsigned int elem_len)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ struct ieee80211_local *local = sdata->local;
+ unsigned int link_id;
+ struct sta_info *sta;
+ u64 changed[IEEE80211_MLD_MAX_NUM_LINKS] = {};
+ int err;
+
+ mutex_lock(&sdata->local->sta_mtx);
+ /*
+ * station info was already allocated and inserted before
+ * the association and should be available to us
+ */
+ sta = sta_info_get(sdata, assoc_data->ap_addr);
+ if (WARN_ON(!sta))
+ goto out_err;
+
+ if (sdata->vif.valid_links) {
+ u16 valid_links = 0;
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ if (!assoc_data->link[link_id].bss)
+ continue;
+ valid_links |= BIT(link_id);
+
+ if (link_id != assoc_data->assoc_link_id) {
+ err = ieee80211_sta_allocate_link(sta, link_id);
+ if (err)
+ goto out_err;
+ }
+ }
+
+ ieee80211_vif_set_links(sdata, valid_links);
+ }
+
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_link_data *link;
+ struct link_sta_info *link_sta;
+
+ if (!assoc_data->link[link_id].bss)
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON(!link))
+ goto out_err;
+
+ if (sdata->vif.valid_links)
+ link_info(link,
+ "local address %pM, AP link address %pM\n",
+ link->conf->addr,
+ assoc_data->link[link_id].bss->bssid);
+
+ link_sta = rcu_dereference_protected(sta->link[link_id],
+ lockdep_is_held(&local->sta_mtx));
+ if (WARN_ON(!link_sta))
+ goto out_err;
+
+ if (link_id != assoc_data->assoc_link_id) {
+ struct cfg80211_bss *cbss = assoc_data->link[link_id].bss;
+ const struct cfg80211_bss_ies *ies;
+
+ rcu_read_lock();
+ ies = rcu_dereference(cbss->ies);
+ ieee80211_get_dtim(ies,
+ &link->conf->sync_dtim_count,
+ &link->u.mgd.dtim_period);
+ link->conf->dtim_period = link->u.mgd.dtim_period ?: 1;
+ link->conf->beacon_int = cbss->beacon_interval;
+ rcu_read_unlock();
+
+ err = ieee80211_prep_channel(sdata, link, cbss,
+ &link->u.mgd.conn_flags);
+ if (err) {
+ link_info(link, "prep_channel failed\n");
+ goto out_err;
+ }
+ }
+
+ err = ieee80211_mgd_setup_link_sta(link, sta, link_sta,
+ assoc_data->link[link_id].bss);
+ if (err)
+ goto out_err;
+
+ if (!ieee80211_assoc_config_link(link, link_sta,
+ assoc_data->link[link_id].bss,
+ mgmt, elem_start, elem_len,
+ &changed[link_id]))
+ goto out_err;
+
+ if (link_id != assoc_data->assoc_link_id) {
+ err = ieee80211_sta_activate_link(sta, link_id);
+ if (err)
+ goto out_err;
+ }
+ }
+
+ rate_control_rate_init(sta);
+
+ if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
+ set_sta_flag(sta, WLAN_STA_MFP);
+ sta->sta.mfp = true;
+ } else {
+ sta->sta.mfp = false;
+ }
+
+ ieee80211_sta_set_max_amsdu_subframes(sta, elems->ext_capab,
+ elems->ext_capab_len);
+
+ sta->sta.wme = (elems->wmm_param || elems->s1g_capab) &&
+ local->hw.queues >= IEEE80211_NUM_ACS;
+
+ err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+ if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
+ err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED);
+ if (err) {
+ sdata_info(sdata,
+ "failed to move station %pM to desired state\n",
+ sta->sta.addr);
+ WARN_ON(__sta_info_destroy(sta));
+ goto out_err;
+ }
+
+ if (sdata->wdev.use_4addr)
+ drv_sta_set_4addr(local, sdata, &sta->sta, true);
+
+ mutex_unlock(&sdata->local->sta_mtx);
+
+ ieee80211_set_associated(sdata, assoc_data, changed);
/*
* If we're using 4-addr mode, let the AP know that we're
@@ -3663,11 +5001,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
ieee80211_sta_reset_beacon_monitor(sdata);
ieee80211_sta_reset_conn_monitor(sdata);
- ret = true;
- out:
- kfree(elems);
- kfree(bss_ies);
- return ret;
+ return true;
+out_err:
+ eth_zero_addr(sdata->vif.cfg.ap_addr);
+ mutex_unlock(&sdata->local->sta_mtx);
+ return false;
}
static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
@@ -3677,27 +5015,35 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
u16 capab_info, status_code, aid;
+ struct ieee80211_elems_parse_params parse_params = {
+ .bss = NULL,
+ .link_id = -1,
+ .from_ap = true,
+ };
struct ieee802_11_elems *elems;
- int ac, uapsd_queues = -1;
- u8 *pos;
+ int ac;
+ const u8 *elem_start;
+ unsigned int elem_len;
bool reassoc;
- struct cfg80211_bss *cbss;
struct ieee80211_event event = {
.type = MLME_EVENT,
.u.mlme.data = ASSOC_EVENT,
};
struct ieee80211_prep_tx_info info = {};
+ struct cfg80211_rx_assoc_resp resp = {
+ .uapsd_queues = -1,
+ };
+ unsigned int link_id;
sdata_assert_lock(sdata);
if (!assoc_data)
return;
- if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid))
+ if (!ether_addr_equal(assoc_data->ap_addr, mgmt->bssid) ||
+ !ether_addr_equal(assoc_data->ap_addr, mgmt->sa))
return;
- cbss = assoc_data->bss;
-
/*
* AssocResp and ReassocResp have identical structure, so process both
* of them in this function.
@@ -3709,12 +5055,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
reassoc = ieee80211_is_reassoc_resp(mgmt->frame_control);
capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
- pos = mgmt->u.assoc_resp.variable;
- aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
- if (cbss->channel->band == NL80211_BAND_S1GHZ) {
- pos = (u8 *) mgmt->u.s1g_assoc_resp.variable;
- aid = 0; /* TODO */
- }
+ if (assoc_data->s1g)
+ elem_start = mgmt->u.s1g_assoc_resp.variable;
+ else
+ elem_start = mgmt->u.assoc_resp.variable;
/*
* Note: this may not be perfect, AP might misbehave - if
@@ -3725,33 +5069,50 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
info.subtype = reassoc ? IEEE80211_STYPE_REASSOC_REQ :
IEEE80211_STYPE_ASSOC_REQ;
- sdata_info(sdata,
- "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n",
- reassoc ? "Rea" : "A", mgmt->sa,
- capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
-
if (assoc_data->fils_kek_len &&
fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0)
return;
- elems = ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false,
- mgmt->bssid, assoc_data->bss->bssid);
+ elem_len = len - (elem_start - (u8 *)mgmt);
+ parse_params.start = elem_start;
+ parse_params.len = elem_len;
+ elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
goto notify_driver;
+ if (elems->aid_resp)
+ aid = le16_to_cpu(elems->aid_resp->aid);
+ else if (assoc_data->s1g)
+ aid = 0; /* TODO */
+ else
+ aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
+
+ /*
+ * The 5 MSB of the AID field are reserved
+ * (802.11-2016 9.4.1.8 AID field)
+ */
+ aid &= 0x7ff;
+
+ sdata_info(sdata,
+ "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n",
+ reassoc ? "Rea" : "A", assoc_data->ap_addr,
+ capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
+
+ ifmgd->broken_ap = false;
+
if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
elems->timeout_int &&
elems->timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
u32 tu, ms;
- cfg80211_assoc_comeback(sdata->dev, assoc_data->bss,
+ cfg80211_assoc_comeback(sdata->dev, assoc_data->ap_addr,
le32_to_cpu(elems->timeout_int->value));
tu = le32_to_cpu(elems->timeout_int->value);
ms = tu * 1024 / 1000;
sdata_info(sdata,
"%pM rejected association temporarily; comeback duration %u TU (%u ms)\n",
- mgmt->sa, tu, ms);
+ assoc_data->ap_addr, tu, ms);
assoc_data->timeout = jiffies + msecs_to_jiffies(ms);
assoc_data->timeout_started = true;
if (ms > IEEE80211_ASSOC_TIMEOUT)
@@ -3761,49 +5122,109 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
if (status_code != WLAN_STATUS_SUCCESS) {
sdata_info(sdata, "%pM denied association (code=%d)\n",
- mgmt->sa, status_code);
- ieee80211_destroy_assoc_data(sdata, false, false);
+ assoc_data->ap_addr, status_code);
event.u.mlme.status = MLME_DENIED;
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
} else {
- if (!ieee80211_assoc_success(sdata, cbss, mgmt, len, elems)) {
+ if (aid == 0 || aid > IEEE80211_MAX_AID) {
+ sdata_info(sdata,
+ "invalid AID value %d (out of range), turn off PS\n",
+ aid);
+ aid = 0;
+ ifmgd->broken_ap = true;
+ }
+
+ if (sdata->vif.valid_links) {
+ if (!elems->multi_link) {
+ sdata_info(sdata,
+ "MLO association with %pM but no multi-link element in response!\n",
+ assoc_data->ap_addr);
+ goto abandon_assoc;
+ }
+
+ if (le16_get_bits(elems->multi_link->control,
+ IEEE80211_ML_CONTROL_TYPE) !=
+ IEEE80211_ML_CONTROL_TYPE_BASIC) {
+ sdata_info(sdata,
+ "bad multi-link element (control=0x%x)\n",
+ le16_to_cpu(elems->multi_link->control));
+ goto abandon_assoc;
+ } else {
+ struct ieee80211_mle_basic_common_info *common;
+
+ common = (void *)elems->multi_link->variable;
+
+ if (memcmp(assoc_data->ap_addr,
+ common->mld_mac_addr, ETH_ALEN)) {
+ sdata_info(sdata,
+ "AP MLD MAC address mismatch: got %pM expected %pM\n",
+ common->mld_mac_addr,
+ assoc_data->ap_addr);
+ goto abandon_assoc;
+ }
+ }
+ }
+
+ sdata->vif.cfg.aid = aid;
+
+ if (!ieee80211_assoc_success(sdata, mgmt, elems,
+ elem_start, elem_len)) {
/* oops -- internal error -- send timeout for now */
- ieee80211_destroy_assoc_data(sdata, false, false);
- cfg80211_assoc_timeout(sdata->dev, cbss);
+ ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
goto notify_driver;
}
event.u.mlme.status = MLME_SUCCESS;
drv_event_callback(sdata->local, sdata, &event);
sdata_info(sdata, "associated\n");
- /*
- * destroy assoc_data afterwards, as otherwise an idle
- * recalc after assoc_data is NULL but before associated
- * is set can cause the interface to go idle
- */
- ieee80211_destroy_assoc_data(sdata, true, false);
-
- /* get uapsd queues configuration */
- uapsd_queues = 0;
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- if (sdata->tx_conf[ac].uapsd)
- uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
-
info.success = 1;
}
- cfg80211_rx_assoc_resp(sdata->dev, cbss, (u8 *)mgmt, len, uapsd_queues,
- ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+ if (!assoc_data->link[link_id].bss)
+ continue;
+ resp.links[link_id].bss = assoc_data->link[link_id].bss;
+ resp.links[link_id].addr = link->conf->addr;
+
+ /* get uapsd queues configuration - same for all links */
+ resp.uapsd_queues = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ if (link->tx_conf[ac].uapsd)
+ resp.uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
+ }
+
+ ieee80211_destroy_assoc_data(sdata,
+ status_code == WLAN_STATUS_SUCCESS ?
+ ASSOC_SUCCESS :
+ ASSOC_REJECTED);
+
+ resp.buf = (u8 *)mgmt;
+ resp.len = len;
+ resp.req_ies = ifmgd->assoc_req_ies;
+ resp.req_ies_len = ifmgd->assoc_req_ies_len;
+ if (sdata->vif.valid_links)
+ resp.ap_mld_addr = sdata->vif.cfg.ap_addr;
+ cfg80211_rx_assoc_resp(sdata->dev, &resp);
notify_driver:
drv_mgd_complete_tx(sdata->local, sdata, &info);
kfree(elems);
+ return;
+abandon_assoc:
+ ieee80211_destroy_assoc_data(sdata, ASSOC_ABANDON);
+ goto notify_driver;
}
-static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
+static void ieee80211_rx_bss_info(struct ieee80211_link_data *link,
struct ieee80211_mgmt *mgmt, size_t len,
struct ieee80211_rx_status *rx_status)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
@@ -3817,15 +5238,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, channel);
if (bss) {
- sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
+ link->conf->beacon_rate = bss->beacon_rate;
ieee80211_rx_bss_put(local, bss);
}
}
-static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
+static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_link_data *link,
struct sk_buff *skb)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_mgmt *mgmt = (void *)skb->data;
struct ieee80211_if_managed *ifmgd;
struct ieee80211_rx_status *rx_status = (void *) skb->cb;
@@ -3857,10 +5279,10 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
+ ieee80211_rx_bss_info(link, mgmt, len, rx_status);
if (ifmgd->associated &&
- ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+ ether_addr_equal(mgmt->bssid, link->u.mgd.bssid))
ieee80211_reset_ap_probe(sdata);
}
@@ -3888,30 +5310,33 @@ static const u64 care_about_ies =
(1ULL << WLAN_EID_HT_OPERATION) |
(1ULL << WLAN_EID_EXT_CHANSWITCH_ANN);
-static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata,
+static void ieee80211_handle_beacon_sig(struct ieee80211_link_data *link,
struct ieee80211_if_managed *ifmgd,
struct ieee80211_bss_conf *bss_conf,
struct ieee80211_local *local,
struct ieee80211_rx_status *rx_status)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
/* Track average RSSI from the Beacon frames of the current AP */
- if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
- ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
- ewma_beacon_signal_init(&ifmgd->ave_beacon_signal);
- ifmgd->last_cqm_event_signal = 0;
- ifmgd->count_beacon_signal = 1;
- ifmgd->last_ave_beacon_signal = 0;
+ if (!link->u.mgd.tracking_signal_avg) {
+ link->u.mgd.tracking_signal_avg = true;
+ ewma_beacon_signal_init(&link->u.mgd.ave_beacon_signal);
+ link->u.mgd.last_cqm_event_signal = 0;
+ link->u.mgd.count_beacon_signal = 1;
+ link->u.mgd.last_ave_beacon_signal = 0;
} else {
- ifmgd->count_beacon_signal++;
+ link->u.mgd.count_beacon_signal++;
}
- ewma_beacon_signal_add(&ifmgd->ave_beacon_signal, -rx_status->signal);
+ ewma_beacon_signal_add(&link->u.mgd.ave_beacon_signal,
+ -rx_status->signal);
if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold &&
- ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
- int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
- int last_sig = ifmgd->last_ave_beacon_signal;
+ link->u.mgd.count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
+ int sig = -ewma_beacon_signal_read(&link->u.mgd.ave_beacon_signal);
+ int last_sig = link->u.mgd.last_ave_beacon_signal;
struct ieee80211_event event = {
.type = RSSI_EVENT,
};
@@ -3922,36 +5347,36 @@ static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata,
*/
if (sig > ifmgd->rssi_max_thold &&
(last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) {
- ifmgd->last_ave_beacon_signal = sig;
+ link->u.mgd.last_ave_beacon_signal = sig;
event.u.rssi.data = RSSI_EVENT_HIGH;
drv_event_callback(local, sdata, &event);
} else if (sig < ifmgd->rssi_min_thold &&
(last_sig >= ifmgd->rssi_max_thold ||
last_sig == 0)) {
- ifmgd->last_ave_beacon_signal = sig;
+ link->u.mgd.last_ave_beacon_signal = sig;
event.u.rssi.data = RSSI_EVENT_LOW;
drv_event_callback(local, sdata, &event);
}
}
if (bss_conf->cqm_rssi_thold &&
- ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
+ link->u.mgd.count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
!(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) {
- int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
- int last_event = ifmgd->last_cqm_event_signal;
+ int sig = -ewma_beacon_signal_read(&link->u.mgd.ave_beacon_signal);
+ int last_event = link->u.mgd.last_cqm_event_signal;
int thold = bss_conf->cqm_rssi_thold;
int hyst = bss_conf->cqm_rssi_hyst;
if (sig < thold &&
(last_event == 0 || sig < last_event - hyst)) {
- ifmgd->last_cqm_event_signal = sig;
+ link->u.mgd.last_cqm_event_signal = sig;
ieee80211_cqm_rssi_notify(
&sdata->vif,
NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
sig, GFP_KERNEL);
} else if (sig > thold &&
(last_event == 0 || sig > last_event + hyst)) {
- ifmgd->last_cqm_event_signal = sig;
+ link->u.mgd.last_cqm_event_signal = sig;
ieee80211_cqm_rssi_notify(
&sdata->vif,
NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
@@ -3960,22 +5385,22 @@ static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata,
}
if (bss_conf->cqm_rssi_low &&
- ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
- int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
- int last_event = ifmgd->last_cqm_event_signal;
+ link->u.mgd.count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
+ int sig = -ewma_beacon_signal_read(&link->u.mgd.ave_beacon_signal);
+ int last_event = link->u.mgd.last_cqm_event_signal;
int low = bss_conf->cqm_rssi_low;
int high = bss_conf->cqm_rssi_high;
if (sig < low &&
(last_event == 0 || last_event >= low)) {
- ifmgd->last_cqm_event_signal = sig;
+ link->u.mgd.last_cqm_event_signal = sig;
ieee80211_cqm_rssi_notify(
&sdata->vif,
NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
sig, GFP_KERNEL);
} else if (sig > high &&
(last_event == 0 || last_event <= high)) {
- ifmgd->last_cqm_event_signal = sig;
+ link->u.mgd.last_cqm_event_signal = sig;
ieee80211_cqm_rssi_notify(
&sdata->vif,
NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
@@ -3994,18 +5419,21 @@ static bool ieee80211_rx_our_beacon(const u8 *tx_bssid,
return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid);
}
-static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_hdr *hdr, size_t len,
struct ieee80211_rx_status *rx_status)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
struct ieee80211_mgmt *mgmt = (void *) hdr;
size_t baselen;
struct ieee802_11_elems *elems;
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_channel *chan;
+ struct link_sta_info *link_sta;
struct sta_info *sta;
u32 changed = 0;
bool erp_valid;
@@ -4013,6 +5441,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
u32 ncrc = 0;
u8 *bssid, *variable = mgmt->u.beacon.variable;
u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
+ struct ieee80211_elems_parse_params parse_params = {
+ .link_id = -1,
+ .from_ap = true,
+ };
sdata_assert_lock(sdata);
@@ -4031,8 +5463,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
+ parse_params.start = variable;
+ parse_params.len = len - baselen;
+
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
if (!chanctx_conf) {
rcu_read_unlock();
return;
@@ -4047,25 +5482,25 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
- ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->bss)) {
- elems = ieee802_11_parse_elems(variable, len - baselen, false,
- bssid,
- ifmgd->assoc_data->bss->bssid);
+ !WARN_ON(sdata->vif.valid_links) &&
+ ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->link[0].bss)) {
+ parse_params.bss = ifmgd->assoc_data->link[0].bss;
+ elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
return;
- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
+ ieee80211_rx_bss_info(link, mgmt, len, rx_status);
if (elems->dtim_period)
- ifmgd->dtim_period = elems->dtim_period;
- ifmgd->have_beacon = true;
+ link->u.mgd.dtim_period = elems->dtim_period;
+ link->u.mgd.have_beacon = true;
ifmgd->assoc_data->need_beacon = false;
if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
- sdata->vif.bss_conf.sync_tsf =
+ link->conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
- sdata->vif.bss_conf.sync_device_ts =
+ link->conf->sync_device_ts =
rx_status->device_timestamp;
- sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count;
+ link->conf->sync_dtim_count = elems->dtim_count;
}
if (elems->mbssid_config_ie)
@@ -4089,12 +5524,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
if (!ifmgd->associated ||
- !ieee80211_rx_our_beacon(bssid, ifmgd->associated))
+ !ieee80211_rx_our_beacon(bssid, link->u.mgd.bss))
return;
- bssid = ifmgd->associated->bssid;
+ bssid = link->u.mgd.bssid;
if (!(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL))
- ieee80211_handle_beacon_sig(sdata, ifmgd, bss_conf,
+ ieee80211_handle_beacon_sig(link, ifmgd, bss_conf,
local, rx_status);
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) {
@@ -4116,15 +5551,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
*/
if (!ieee80211_is_s1g_beacon(hdr->frame_control))
ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
- elems = ieee802_11_parse_elems_crc(variable, len - baselen,
- false, care_about_ies, ncrc,
- mgmt->bssid, bssid);
+ parse_params.bss = link->u.mgd.bss;
+ parse_params.filter = care_about_ies;
+ parse_params.crc = ncrc;
+ elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
return;
ncrc = elems->crc;
if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
- ieee80211_check_tim(elems->tim, elems->tim_len, bss_conf->aid)) {
+ ieee80211_check_tim(elems->tim, elems->tim_len, vif_cfg->aid)) {
if (local->hw.conf.dynamic_ps_timeout > 0) {
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
@@ -4157,28 +5593,28 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
(u8 *) &noa, sizeof(noa));
if (ret >= 2) {
- if (sdata->u.mgd.p2p_noa_index != noa.index) {
+ if (link->u.mgd.p2p_noa_index != noa.index) {
/* valid noa_attr and index changed */
- sdata->u.mgd.p2p_noa_index = noa.index;
+ link->u.mgd.p2p_noa_index = noa.index;
memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa));
changed |= BSS_CHANGED_P2P_PS;
/*
* make sure we update all information, the CRC
* mechanism doesn't look at P2P attributes.
*/
- ifmgd->beacon_crc_valid = false;
+ link->u.mgd.beacon_crc_valid = false;
}
- } else if (sdata->u.mgd.p2p_noa_index != -1) {
+ } else if (link->u.mgd.p2p_noa_index != -1) {
/* noa_attr not found and we had valid noa_attr before */
- sdata->u.mgd.p2p_noa_index = -1;
+ link->u.mgd.p2p_noa_index = -1;
memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr));
changed |= BSS_CHANGED_P2P_PS;
- ifmgd->beacon_crc_valid = false;
+ link->u.mgd.beacon_crc_valid = false;
}
}
- if (ifmgd->csa_waiting_bcn)
- ieee80211_chswitch_post_beacon(sdata);
+ if (link->u.mgd.csa_waiting_bcn)
+ ieee80211_chswitch_post_beacon(link);
/*
* Update beacon timing and dtim count on every beacon appearance. This
@@ -4190,27 +5626,27 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
*/
if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) &&
!ieee80211_is_s1g_beacon(hdr->frame_control)) {
- sdata->vif.bss_conf.sync_tsf =
+ link->conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
- sdata->vif.bss_conf.sync_device_ts =
+ link->conf->sync_device_ts =
rx_status->device_timestamp;
- sdata->vif.bss_conf.sync_dtim_count = elems->dtim_count;
+ link->conf->sync_dtim_count = elems->dtim_count;
}
- if ((ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) ||
+ if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) ||
ieee80211_is_s1g_short_beacon(mgmt->frame_control))
goto free;
- ifmgd->beacon_crc = ncrc;
- ifmgd->beacon_crc_valid = true;
+ link->u.mgd.beacon_crc = ncrc;
+ link->u.mgd.beacon_crc_valid = true;
- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
+ ieee80211_rx_bss_info(link, mgmt, len, rx_status);
- ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
+ ieee80211_sta_process_chanswitch(link, rx_status->mactime,
rx_status->device_timestamp,
elems, true);
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) &&
- ieee80211_sta_wmm_params(local, sdata, elems->wmm_param,
+ if (!link->u.mgd.disable_wmm_tracking &&
+ ieee80211_sta_wmm_params(local, link, elems->wmm_param,
elems->wmm_param_len,
elems->mu_edca_param_set))
changed |= BSS_CHANGED_QOS;
@@ -4219,12 +5655,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
* If we haven't had a beacon before, tell the driver about the
* DTIM period (and beacon timing if desired) now.
*/
- if (!ifmgd->have_beacon) {
+ if (!link->u.mgd.have_beacon) {
/* a few bogus AP send dtim_period = 0 or no TIM IE */
bss_conf->dtim_period = elems->dtim_period ?: 1;
changed |= BSS_CHANGED_BEACON_INFO;
- ifmgd->have_beacon = true;
+ link->u.mgd.have_beacon = true;
mutex_lock(&local->iflist_mtx);
ieee80211_recalc_ps(local);
@@ -4241,18 +5677,29 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
if (!ieee80211_is_s1g_beacon(hdr->frame_control))
- changed |= ieee80211_handle_bss_capability(sdata,
+ changed |= ieee80211_handle_bss_capability(link,
le16_to_cpu(mgmt->u.beacon.capab_info),
erp_valid, erp_value);
mutex_lock(&local->sta_mtx);
- sta = sta_info_get(sdata, bssid);
+ sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
+ if (WARN_ON(!sta)) {
+ mutex_unlock(&local->sta_mtx);
+ goto free;
+ }
+ link_sta = rcu_dereference_protected(sta->link[link->link_id],
+ lockdep_is_held(&local->sta_mtx));
+ if (WARN_ON(!link_sta)) {
+ mutex_unlock(&local->sta_mtx);
+ goto free;
+ }
- changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
+ changed |= ieee80211_recalc_twt_req(link, link_sta, elems);
- if (ieee80211_config_bw(sdata, sta, elems->ht_cap_elem,
+ if (ieee80211_config_bw(link, elems->ht_cap_elem,
elems->vht_cap_elem, elems->ht_operation,
elems->vht_operation, elems->he_operation,
+ elems->eht_operation,
elems->s1g_oper, bssid, &changed)) {
mutex_unlock(&local->sta_mtx);
sdata_info(sdata,
@@ -4269,17 +5716,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
if (sta && elems->opmode_notif)
- ieee80211_vht_handle_opmode(sdata, sta, *elems->opmode_notif,
+ ieee80211_vht_handle_opmode(sdata, link_sta,
+ *elems->opmode_notif,
rx_status->band);
mutex_unlock(&local->sta_mtx);
- changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
+ changed |= ieee80211_handle_pwr_constr(link, chan, mgmt,
elems->country_elem,
elems->country_elem_len,
elems->pwr_constr_elem,
elems->cisco_dtpc_elem);
- ieee80211_bss_info_change_notify(sdata, changed);
+ ieee80211_link_info_change_notify(sdata, link, changed);
free:
kfree(elems);
}
@@ -4287,6 +5735,7 @@ free:
void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
+ struct ieee80211_link_data *link = &sdata->deflink;
struct ieee80211_rx_status *rx_status;
struct ieee80211_hdr *hdr;
u16 fc;
@@ -4298,7 +5747,7 @@ void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
sdata_lock(sdata);
switch (fc & IEEE80211_FCTL_STYPE) {
case IEEE80211_STYPE_S1G_BEACON:
- ieee80211_rx_mgmt_beacon(sdata, hdr, skb->len, rx_status);
+ ieee80211_rx_mgmt_beacon(link, hdr, skb->len, rx_status);
break;
}
sdata_unlock(sdata);
@@ -4307,6 +5756,7 @@ void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata,
void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
+ struct ieee80211_link_data *link = &sdata->deflink;
struct ieee80211_rx_status *rx_status;
struct ieee80211_mgmt *mgmt;
u16 fc;
@@ -4318,13 +5768,20 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
sdata_lock(sdata);
+ if (rx_status->link_valid) {
+ link = sdata_dereference(sdata->link[rx_status->link_id],
+ sdata);
+ if (!link)
+ goto out;
+ }
+
switch (fc & IEEE80211_FCTL_STYPE) {
case IEEE80211_STYPE_BEACON:
- ieee80211_rx_mgmt_beacon(sdata, (void *)mgmt,
+ ieee80211_rx_mgmt_beacon(link, (void *)mgmt,
skb->len, rx_status);
break;
case IEEE80211_STYPE_PROBE_RESP:
- ieee80211_rx_mgmt_probe_resp(sdata, skb);
+ ieee80211_rx_mgmt_probe_resp(link, skb);
break;
case IEEE80211_STYPE_AUTH:
ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len);
@@ -4353,10 +5810,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
/* CSA IE cannot be overridden, no need for BSSID */
elems = ieee802_11_parse_elems(
mgmt->u.action.u.chan_switch.variable,
- ies_len, true, mgmt->bssid, NULL);
+ ies_len, true, NULL);
if (elems && !elems->parse_error)
- ieee80211_sta_process_chanswitch(sdata,
+ ieee80211_sta_process_chanswitch(link,
rx_status->mactime,
rx_status->device_timestamp,
elems, false);
@@ -4377,14 +5834,14 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
*/
elems = ieee802_11_parse_elems(
mgmt->u.action.u.ext_chan_switch.variable,
- ies_len, true, mgmt->bssid, NULL);
+ ies_len, true, NULL);
if (elems && !elems->parse_error) {
/* for the handling code pretend it was an IE */
elems->ext_chansw_ie =
&mgmt->u.action.u.ext_chan_switch.data;
- ieee80211_sta_process_chanswitch(sdata,
+ ieee80211_sta_process_chanswitch(link,
rx_status->mactime,
rx_status->device_timestamp,
elems, false);
@@ -4394,6 +5851,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
}
break;
}
+out:
sdata_unlock(sdata);
}
@@ -4406,7 +5864,7 @@ static void ieee80211_sta_timer(struct timer_list *t)
}
void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
- u8 *bssid, u8 reason, bool tx)
+ u8 reason, bool tx)
{
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -4438,7 +5896,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) {
sdata_info(sdata, "authentication with %pM timed out\n",
- auth_data->bss->bssid);
+ auth_data->ap_addr);
/*
* Most likely AP is not in the range so remove the
@@ -4455,7 +5913,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
drv_mgd_prepare_tx(local, sdata, &info);
sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
- auth_data->bss->bssid, auth_data->tries,
+ auth_data->ap_addr, auth_data->tries,
IEEE80211_AUTH_MAX_TRIES);
auth_data->expected_transaction = 2;
@@ -4472,9 +5930,8 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
auth_data->data, auth_data->data_len,
- auth_data->bss->bssid,
- auth_data->bss->bssid, NULL, 0, 0,
- tx_flags);
+ auth_data->ap_addr, auth_data->ap_addr,
+ NULL, 0, 0, tx_flags);
if (tx_flags == 0) {
if (auth_data->algorithm == WLAN_AUTH_SAE)
@@ -4497,27 +5954,31 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data;
struct ieee80211_local *local = sdata->local;
+ int ret;
sdata_assert_lock(sdata);
assoc_data->tries++;
if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) {
sdata_info(sdata, "association with %pM timed out\n",
- assoc_data->bss->bssid);
+ assoc_data->ap_addr);
/*
* Most likely AP is not in the range so remove the
* bss struct for that AP.
*/
- cfg80211_unlink_bss(local->hw.wiphy, assoc_data->bss);
+ cfg80211_unlink_bss(local->hw.wiphy,
+ assoc_data->link[assoc_data->assoc_link_id].bss);
return -ETIMEDOUT;
}
sdata_info(sdata, "associate with %pM (try %d/%d)\n",
- assoc_data->bss->bssid, assoc_data->tries,
+ assoc_data->ap_addr, assoc_data->tries,
IEEE80211_ASSOC_MAX_TRIES);
- ieee80211_send_assoc(sdata);
+ ret = ieee80211_send_assoc(sdata);
+ if (ret)
+ return ret;
if (!ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
@@ -4590,25 +6051,25 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
if (ifmgd->auth_data && ifmgd->auth_data->timeout_started &&
time_after(jiffies, ifmgd->auth_data->timeout)) {
- if (ifmgd->auth_data->done) {
+ if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) {
/*
- * ok ... we waited for assoc but userspace didn't,
- * so let's just kill the auth data
+ * ok ... we waited for assoc or continuation but
+ * userspace didn't do it, so kill the auth data
*/
ieee80211_destroy_auth_data(sdata, false);
} else if (ieee80211_auth(sdata)) {
- u8 bssid[ETH_ALEN];
+ u8 ap_addr[ETH_ALEN];
struct ieee80211_event event = {
.type = MLME_EVENT,
.u.mlme.data = AUTH_EVENT,
.u.mlme.status = MLME_TIMEOUT,
};
- memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN);
+ memcpy(ap_addr, ifmgd->auth_data->ap_addr, ETH_ALEN);
ieee80211_destroy_auth_data(sdata, false);
- cfg80211_auth_timeout(sdata->dev, bssid);
+ cfg80211_auth_timeout(sdata->dev, ap_addr);
drv_event_callback(sdata->local, sdata, &event);
}
} else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started)
@@ -4616,17 +6077,16 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started &&
time_after(jiffies, ifmgd->assoc_data->timeout)) {
- if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) ||
+ if ((ifmgd->assoc_data->need_beacon &&
+ !sdata->deflink.u.mgd.have_beacon) ||
ieee80211_do_assoc(sdata)) {
- struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
struct ieee80211_event event = {
.type = MLME_EVENT,
.u.mlme.data = ASSOC_EVENT,
.u.mlme.status = MLME_TIMEOUT,
};
- ieee80211_destroy_assoc_data(sdata, false, false);
- cfg80211_assoc_timeout(sdata->dev, bss);
+ ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
drv_event_callback(sdata->local, sdata, &event);
}
} else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started)
@@ -4634,11 +6094,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL &&
ifmgd->associated) {
- u8 bssid[ETH_ALEN];
+ u8 *bssid = sdata->deflink.u.mgd.bssid;
int max_tries;
- memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
-
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
max_tries = max_nullfunc_tries;
else
@@ -4658,7 +6116,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
mlme_dbg(sdata,
"No ack for nullfunc frame to AP %pM, disconnecting.\n",
bssid);
- ieee80211_sta_connection_lost(sdata, bssid,
+ ieee80211_sta_connection_lost(sdata,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
false);
}
@@ -4668,7 +6126,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
mlme_dbg(sdata,
"Failed to send nullfunc to AP %pM after %dms, disconnecting\n",
bssid, probe_wait_ms);
- ieee80211_sta_connection_lost(sdata, bssid,
+ ieee80211_sta_connection_lost(sdata,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
} else if (ifmgd->probe_send_count < max_tries) {
mlme_dbg(sdata,
@@ -4685,7 +6143,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
"No probe response from AP %pM after %dms, disconnecting.\n",
bssid, probe_wait_ms);
- ieee80211_sta_connection_lost(sdata, bssid,
+ ieee80211_sta_connection_lost(sdata,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
}
}
@@ -4697,9 +6155,12 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
from_timer(sdata, t, u.mgd.bcn_mon_timer);
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn)
+ if (WARN_ON(sdata->vif.valid_links))
+ return;
+
+ if (sdata->vif.bss_conf.csa_active &&
+ !sdata->deflink.u.mgd.csa_waiting_bcn)
return;
if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
@@ -4719,16 +6180,20 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
struct sta_info *sta;
unsigned long timeout;
- if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn)
+ if (WARN_ON(sdata->vif.valid_links))
return;
- sta = sta_info_get(sdata, ifmgd->bssid);
+ if (sdata->vif.bss_conf.csa_active &&
+ !sdata->deflink.u.mgd.csa_waiting_bcn)
+ return;
+
+ sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
if (!sta)
return;
- timeout = sta->status_stats.last_ack;
- if (time_before(sta->status_stats.last_ack, sta->rx_stats.last_rx))
- timeout = sta->rx_stats.last_rx;
+ timeout = sta->deflink.status_stats.last_ack;
+ if (time_before(sta->deflink.status_stats.last_ack, sta->deflink.rx_stats.last_rx))
+ timeout = sta->deflink.rx_stats.last_rx;
timeout += IEEE80211_CONNECTION_IDLE_TIME;
/* If timeout is after now, then update timer to fire at
@@ -4772,21 +6237,21 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
sdata_lock(sdata);
if (ifmgd->auth_data || ifmgd->assoc_data) {
- const u8 *bssid = ifmgd->auth_data ?
- ifmgd->auth_data->bss->bssid :
- ifmgd->assoc_data->bss->bssid;
+ const u8 *ap_addr = ifmgd->auth_data ?
+ ifmgd->auth_data->ap_addr :
+ ifmgd->assoc_data->ap_addr;
/*
* If we are trying to authenticate / associate while suspending,
* cfg80211 won't know and won't actually abort those attempts,
* thus we need to do that ourselves.
*/
- ieee80211_send_deauth_disassoc(sdata, bssid, bssid,
+ ieee80211_send_deauth_disassoc(sdata, ap_addr, ap_addr,
IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DEAUTH_LEAVING,
false, frame_buf);
if (ifmgd->assoc_data)
- ieee80211_destroy_assoc_data(sdata, false, true);
+ ieee80211_destroy_assoc_data(sdata, ASSOC_ABANDON);
if (ifmgd->auth_data)
ieee80211_destroy_auth_data(sdata, false);
cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
@@ -4818,12 +6283,13 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
.bssid = bssid,
};
- memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
+ memcpy(bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
ieee80211_mgd_deauth(sdata, &req);
}
sdata_unlock(sdata);
}
+#endif
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
{
@@ -4839,35 +6305,52 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME;
mlme_dbg(sdata, "driver requested disconnect after resume\n");
ieee80211_sta_connection_lost(sdata,
- ifmgd->associated->bssid,
WLAN_REASON_UNSPECIFIED,
true);
sdata_unlock(sdata);
return;
}
+
+ if (sdata->flags & IEEE80211_SDATA_DISCONNECT_HW_RESTART) {
+ sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_HW_RESTART;
+ mlme_dbg(sdata, "driver requested disconnect after hardware restart\n");
+ ieee80211_sta_connection_lost(sdata,
+ WLAN_REASON_UNSPECIFIED,
+ true);
+ sdata_unlock(sdata);
+ return;
+ }
+
sdata_unlock(sdata);
}
-#endif
+
+static void ieee80211_request_smps_mgd_work(struct work_struct *work)
+{
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
+ u.mgd.request_smps_work);
+
+ sdata_lock(link->sdata);
+ __ieee80211_request_smps_mgd(link->sdata, link,
+ link->u.mgd.driver_smps_mode);
+ sdata_unlock(link->sdata);
+}
/* interface setup */
void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_if_managed *ifmgd;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- ifmgd = &sdata->u.mgd;
INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
- INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
INIT_WORK(&ifmgd->beacon_connection_loss_work,
ieee80211_beacon_connection_loss_work);
INIT_WORK(&ifmgd->csa_connection_drop_work,
ieee80211_csa_connection_drop_work);
- INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work);
INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
ieee80211_tdls_peer_del_work);
timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0);
timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0);
- timer_setup(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, 0);
INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk,
ieee80211_sta_handle_tspec_ac_params_wk);
@@ -4875,393 +6358,121 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
ifmgd->powersave = sdata->wdev.ps;
ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues;
ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len;
- ifmgd->p2p_noa_index = -1;
-
- if (sdata->local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS)
- ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC;
- else
- ifmgd->req_smps = IEEE80211_SMPS_OFF;
-
/* Setup TDLS data */
spin_lock_init(&ifmgd->teardown_lock);
ifmgd->teardown_skb = NULL;
ifmgd->orig_teardown_skb = NULL;
}
-/* scan finished notification */
-void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- /* Restart STA timers */
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (ieee80211_sdata_running(sdata))
- ieee80211_restart_sta_timer(sdata);
- }
- rcu_read_unlock();
-}
-
-static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss)
-{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- const struct element *ht_cap_elem, *vht_cap_elem;
- const struct ieee80211_ht_cap *ht_cap;
- const struct ieee80211_vht_cap *vht_cap;
- u8 chains = 1;
-
- if (ifmgd->flags & IEEE80211_STA_DISABLE_HT)
- return chains;
-
- ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY);
- if (ht_cap_elem && ht_cap_elem->datalen >= sizeof(*ht_cap)) {
- ht_cap = (void *)ht_cap_elem->data;
- chains = ieee80211_mcs_to_chains(&ht_cap->mcs);
- /*
- * TODO: use "Tx Maximum Number Spatial Streams Supported" and
- * "Tx Unequal Modulation Supported" fields.
- */
- }
-
- if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
- return chains;
-
- vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
- if (vht_cap_elem && vht_cap_elem->datalen >= sizeof(*vht_cap)) {
- u8 nss;
- u16 tx_mcs_map;
-
- vht_cap = (void *)vht_cap_elem->data;
- tx_mcs_map = le16_to_cpu(vht_cap->supp_mcs.tx_mcs_map);
- for (nss = 8; nss > 0; nss--) {
- if (((tx_mcs_map >> (2 * (nss - 1))) & 3) !=
- IEEE80211_VHT_MCS_NOT_SUPPORTED)
- break;
- }
- /* TODO: use "Tx Highest Supported Long GI Data Rate" field? */
- chains = max(chains, nss);
- }
-
- return chains;
-}
-
-static bool
-ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_supported_band *sband,
- const struct ieee80211_he_operation *he_op)
+void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
{
- const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_iftype_cap(sband,
- ieee80211_vif_type_p2p(&sdata->vif));
- u16 ap_min_req_set;
- int i;
-
- if (!sta_he_cap || !he_op)
- return false;
-
- ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
-
- /* Need to go over for 80MHz, 160MHz and for 80+80 */
- for (i = 0; i < 3; i++) {
- const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
- &sta_he_cap->he_mcs_nss_supp;
- u16 sta_mcs_map_rx =
- le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
- u16 sta_mcs_map_tx =
- le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
- u8 nss;
- bool verified = true;
-
- /*
- * For each band there is a maximum of 8 spatial streams
- * possible. Each of the sta_mcs_map_* is a 16-bit struct built
- * of 2 bits per NSS (1-8), with the values defined in enum
- * ieee80211_he_mcs_support. Need to make sure STA TX and RX
- * capabilities aren't less than the AP's minimum requirements
- * for this HE BSS per SS.
- * It is enough to find one such band that meets the reqs.
- */
- for (nss = 8; nss > 0; nss--) {
- u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
- u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
- u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+ struct ieee80211_local *local = sdata->local;
+ unsigned int link_id = link->link_id;
- if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
- continue;
+ link->u.mgd.p2p_noa_index = -1;
+ link->u.mgd.conn_flags = 0;
+ link->conf->bssid = link->u.mgd.bssid;
- /*
- * Make sure the HE AP doesn't require MCSs that aren't
- * supported by the client
- */
- if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
- (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
- verified = false;
- break;
- }
- }
+ INIT_WORK(&link->u.mgd.request_smps_work,
+ ieee80211_request_smps_mgd_work);
+ if (local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS)
+ link->u.mgd.req_smps = IEEE80211_SMPS_AUTOMATIC;
+ else
+ link->u.mgd.req_smps = IEEE80211_SMPS_OFF;
- if (verified)
- return true;
- }
+ INIT_WORK(&link->u.mgd.chswitch_work, ieee80211_chswitch_work);
+ timer_setup(&link->u.mgd.chswitch_timer, ieee80211_chswitch_timer, 0);
- /* If here, STA doesn't meet AP's HE min requirements */
- return false;
+ if (sdata->u.mgd.assoc_data)
+ ether_addr_copy(link->conf->addr,
+ sdata->u.mgd.assoc_data->link[link_id].addr);
+ else if (!is_valid_ether_addr(link->conf->addr))
+ eth_random_addr(link->conf->addr);
}
-static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss)
+/* scan finished notification */
+void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- const struct ieee80211_ht_cap *ht_cap = NULL;
- const struct ieee80211_ht_operation *ht_oper = NULL;
- const struct ieee80211_vht_operation *vht_oper = NULL;
- const struct ieee80211_he_operation *he_oper = NULL;
- const struct ieee80211_s1g_oper_ie *s1g_oper = NULL;
- struct ieee80211_supported_band *sband;
- struct cfg80211_chan_def chandef;
- bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
- bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
- struct ieee80211_bss *bss = (void *)cbss->priv;
- struct ieee802_11_elems *elems;
- const struct cfg80211_bss_ies *ies;
- int ret;
- u32 i;
- bool have_80mhz;
+ struct ieee80211_sub_if_data *sdata;
+ /* Restart STA timers */
rcu_read_lock();
-
- ies = rcu_dereference(cbss->ies);
- elems = ieee802_11_parse_elems(ies->data, ies->len, false,
- NULL, NULL);
- if (!elems) {
- rcu_read_unlock();
- return -ENOMEM;
- }
-
- sband = local->hw.wiphy->bands[cbss->channel->band];
-
- ifmgd->flags &= ~(IEEE80211_STA_DISABLE_40MHZ |
- IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ);
-
- /* disable HT/VHT/HE if we don't support them */
- if (!sband->ht_cap.ht_supported && !is_6ghz) {
- mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
-
- if (!sband->vht_cap.vht_supported && is_5ghz) {
- mlme_dbg(sdata, "VHT not supported, disabling VHT/HE\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
-
- if (!ieee80211_get_he_iftype_cap(sband,
- ieee80211_vif_type_p2p(&sdata->vif))) {
- mlme_dbg(sdata, "HE not supported, disabling it\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
-
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) {
- ht_oper = elems->ht_operation;
- ht_cap = elems->ht_cap_elem;
-
- if (!ht_cap) {
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ht_oper = NULL;
- }
- }
-
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) {
- vht_oper = elems->vht_operation;
- if (vht_oper && !ht_oper) {
- vht_oper = NULL;
- sdata_info(sdata,
- "AP advertised VHT without HT, disabling HT/VHT/HE\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
-
- if (!elems->vht_cap_elem) {
- sdata_info(sdata,
- "bad VHT capabilities, disabling VHT\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- vht_oper = NULL;
- }
- }
-
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) {
- he_oper = elems->he_operation;
-
- if (is_6ghz) {
- struct ieee80211_bss_conf *bss_conf;
- u8 i, j = 0;
-
- bss_conf = &sdata->vif.bss_conf;
-
- if (elems->pwr_constr_elem)
- bss_conf->pwr_reduction = *elems->pwr_constr_elem;
-
- BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) !=
- ARRAY_SIZE(elems->tx_pwr_env));
-
- for (i = 0; i < elems->tx_pwr_env_num; i++) {
- if (elems->tx_pwr_env_len[i] >
- sizeof(bss_conf->tx_pwr_env[j]))
- continue;
-
- bss_conf->tx_pwr_env_num++;
- memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i],
- elems->tx_pwr_env_len[i]);
- j++;
- }
- }
-
- if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
-
- /* Allow VHT if at least one channel on the sband supports 80 MHz */
- have_80mhz = false;
- for (i = 0; i < sband->n_channels; i++) {
- if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
- IEEE80211_CHAN_NO_80MHZ))
- continue;
-
- have_80mhz = true;
- break;
- }
-
- if (!have_80mhz) {
- sdata_info(sdata, "80 MHz not supported, disabling VHT\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- }
-
- if (sband->band == NL80211_BAND_S1GHZ) {
- s1g_oper = elems->s1g_oper;
- if (!s1g_oper)
- sdata_info(sdata,
- "AP missing S1G operation element?\n");
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_restart_sta_timer(sdata);
}
-
- ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
- cbss->channel,
- bss->vht_cap_info,
- ht_oper, vht_oper, he_oper,
- s1g_oper,
- &chandef, false);
-
- sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
- local->rx_chains);
-
rcu_read_unlock();
- /* the element data was RCU protected so no longer valid anyway */
- kfree(elems);
- elems = NULL;
-
- if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) {
- sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
- return -EINVAL;
- }
-
- /* will change later if needed */
- sdata->smps_mode = IEEE80211_SMPS_OFF;
-
- mutex_lock(&local->mtx);
- /*
- * If this fails (possibly due to channel context sharing
- * on incompatible channels, e.g. 80+80 and 160 sharing the
- * same control channel) try to use a smaller bandwidth.
- */
- ret = ieee80211_vif_use_channel(sdata, &chandef,
- IEEE80211_CHANCTX_SHARED);
-
- /* don't downgrade for 5 and 10 MHz channels, though. */
- if (chandef.width == NL80211_CHAN_WIDTH_5 ||
- chandef.width == NL80211_CHAN_WIDTH_10)
- goto out;
-
- while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
- ifmgd->flags |= ieee80211_chandef_downgrade(&chandef);
- ret = ieee80211_vif_use_channel(sdata, &chandef,
- IEEE80211_CHANCTX_SHARED);
- }
- out:
- mutex_unlock(&local->mtx);
- return ret;
-}
-
-static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies,
- u8 *dtim_count, u8 *dtim_period)
-{
- const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len);
- const u8 *idx_ie = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, ies->data,
- ies->len);
- const struct ieee80211_tim_ie *tim = NULL;
- const struct ieee80211_bssid_index *idx;
- bool valid = tim_ie && tim_ie[1] >= 2;
-
- if (valid)
- tim = (void *)(tim_ie + 2);
-
- if (dtim_count)
- *dtim_count = valid ? tim->dtim_count : 0;
-
- if (dtim_period)
- *dtim_period = valid ? tim->dtim_period : 0;
-
- /* Check if value is overridden by non-transmitted profile */
- if (!idx_ie || idx_ie[1] < 3)
- return valid;
-
- idx = (void *)(idx_ie + 2);
-
- if (dtim_count)
- *dtim_count = idx->dtim_count;
-
- if (dtim_period)
- *dtim_period = idx->dtim_period;
-
- return true;
}
static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss, bool assoc,
+ struct cfg80211_bss *cbss, s8 link_id,
+ const u8 *ap_mld_addr, bool assoc,
bool override)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_bss *bss = (void *)cbss->priv;
struct sta_info *new_sta = NULL;
- struct ieee80211_supported_band *sband;
+ struct ieee80211_link_data *link;
bool have_sta = false;
+ bool mlo;
int err;
- sband = local->hw.wiphy->bands[cbss->channel->band];
+ if (link_id >= 0) {
+ mlo = true;
+ if (WARN_ON(!ap_mld_addr))
+ return -EINVAL;
+ err = ieee80211_vif_set_links(sdata, BIT(link_id));
+ } else {
+ if (WARN_ON(ap_mld_addr))
+ return -EINVAL;
+ ap_mld_addr = cbss->bssid;
+ err = ieee80211_vif_set_links(sdata, 0);
+ link_id = 0;
+ mlo = false;
+ }
- if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data))
- return -EINVAL;
+ if (err)
+ return err;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON(!link)) {
+ err = -ENOLINK;
+ goto out_err;
+ }
+
+ if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) {
+ err = -EINVAL;
+ goto out_err;
+ }
/* If a reconfig is happening, bail out */
- if (local->in_reconfig)
- return -EBUSY;
+ if (local->in_reconfig) {
+ err = -EBUSY;
+ goto out_err;
+ }
if (assoc) {
rcu_read_lock();
- have_sta = sta_info_get(sdata, cbss->bssid);
+ have_sta = sta_info_get(sdata, ap_mld_addr);
rcu_read_unlock();
}
if (!have_sta) {
- new_sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL);
- if (!new_sta)
- return -ENOMEM;
+ if (mlo)
+ new_sta = sta_info_alloc_with_link(sdata, ap_mld_addr,
+ link_id, cbss->bssid,
+ GFP_KERNEL);
+ else
+ new_sta = sta_info_alloc(sdata, ap_mld_addr, GFP_KERNEL);
+
+ if (!new_sta) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ new_sta->sta.mlo = mlo;
}
/*
@@ -5278,99 +6489,62 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
* it might need the new channel for that.
*/
if (new_sta) {
- u32 rates = 0, basic_rates = 0;
- bool have_higher_than_11mbit = false;
- int min_rate = INT_MAX, min_rate_index = -1;
const struct cfg80211_bss_ies *ies;
- int shift = ieee80211_vif_get_shift(&sdata->vif);
+ struct link_sta_info *link_sta;
- /* TODO: S1G Basic Rate Set is expressed elsewhere */
- if (cbss->channel->band == NL80211_BAND_S1GHZ) {
- ieee80211_s1g_sta_rate_init(new_sta);
- goto skip_rates;
+ rcu_read_lock();
+ link_sta = rcu_dereference(new_sta->link[link_id]);
+ if (WARN_ON(!link_sta)) {
+ rcu_read_unlock();
+ sta_info_free(local, new_sta);
+ err = -EINVAL;
+ goto out_err;
}
- ieee80211_get_rates(sband, bss->supp_rates,
- bss->supp_rates_len,
- &rates, &basic_rates,
- &have_higher_than_11mbit,
- &min_rate, &min_rate_index,
- shift);
-
- /*
- * This used to be a workaround for basic rates missing
- * in the association response frame. Now that we no
- * longer use the basic rates from there, it probably
- * doesn't happen any more, but keep the workaround so
- * in case some *other* APs are buggy in different ways
- * we can connect -- with a warning.
- * Allow this workaround only in case the AP provided at least
- * one rate.
- */
- if (min_rate_index < 0) {
- sdata_info(sdata,
- "No legacy rates in association response\n");
-
+ err = ieee80211_mgd_setup_link_sta(link, new_sta,
+ link_sta, cbss);
+ if (err) {
+ rcu_read_unlock();
sta_info_free(local, new_sta);
- return -EINVAL;
- } else if (!basic_rates) {
- sdata_info(sdata,
- "No basic rates, using min rate instead\n");
- basic_rates = BIT(min_rate_index);
+ goto out_err;
}
- if (rates)
- new_sta->sta.supp_rates[cbss->channel->band] = rates;
- else
- sdata_info(sdata,
- "No rates found, keeping mandatory only\n");
-
- sdata->vif.bss_conf.basic_rates = basic_rates;
-
- /* cf. IEEE 802.11 9.2.12 */
- if (cbss->channel->band == NL80211_BAND_2GHZ &&
- have_higher_than_11mbit)
- sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
- else
- sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
-
-skip_rates:
- memcpy(ifmgd->bssid, cbss->bssid, ETH_ALEN);
+ memcpy(link->u.mgd.bssid, cbss->bssid, ETH_ALEN);
/* set timing information */
- sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
- rcu_read_lock();
+ link->conf->beacon_int = cbss->beacon_interval;
ies = rcu_dereference(cbss->beacon_ies);
if (ies) {
- sdata->vif.bss_conf.sync_tsf = ies->tsf;
- sdata->vif.bss_conf.sync_device_ts =
+ link->conf->sync_tsf = ies->tsf;
+ link->conf->sync_device_ts =
bss->device_ts_beacon;
ieee80211_get_dtim(ies,
- &sdata->vif.bss_conf.sync_dtim_count,
+ &link->conf->sync_dtim_count,
NULL);
} else if (!ieee80211_hw_check(&sdata->local->hw,
TIMING_BEACON_ONLY)) {
ies = rcu_dereference(cbss->proberesp_ies);
/* must be non-NULL since beacon IEs were NULL */
- sdata->vif.bss_conf.sync_tsf = ies->tsf;
- sdata->vif.bss_conf.sync_device_ts =
+ link->conf->sync_tsf = ies->tsf;
+ link->conf->sync_device_ts =
bss->device_ts_presp;
- sdata->vif.bss_conf.sync_dtim_count = 0;
+ link->conf->sync_dtim_count = 0;
} else {
- sdata->vif.bss_conf.sync_tsf = 0;
- sdata->vif.bss_conf.sync_device_ts = 0;
- sdata->vif.bss_conf.sync_dtim_count = 0;
+ link->conf->sync_tsf = 0;
+ link->conf->sync_device_ts = 0;
+ link->conf->sync_dtim_count = 0;
}
rcu_read_unlock();
}
if (new_sta || override) {
- err = ieee80211_prep_channel(sdata, cbss);
+ err = ieee80211_prep_channel(sdata, link, cbss,
+ &link->u.mgd.conn_flags);
if (err) {
if (new_sta)
sta_info_free(local, new_sta);
- return -EINVAL;
+ goto out_err;
}
}
@@ -5379,9 +6553,10 @@ skip_rates:
* tell driver about BSSID, basic rates and timing
* this was set up above, before setting the channel
*/
- ieee80211_bss_info_change_notify(sdata,
- BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES |
- BSS_CHANGED_BEACON_INT);
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_BSSID |
+ BSS_CHANGED_BASIC_RATES |
+ BSS_CHANGED_BEACON_INT);
if (assoc)
sta_info_pre_move_state(new_sta, IEEE80211_STA_AUTH);
@@ -5392,16 +6567,21 @@ skip_rates:
sdata_info(sdata,
"failed to insert STA entry for the AP (error %d)\n",
err);
- return err;
+ goto out_err;
}
} else
- WARN_ON_ONCE(!ether_addr_equal(ifmgd->bssid, cbss->bssid));
+ WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid));
/* Cancel scan to ensure that nothing interferes with connection */
if (local->scanning)
ieee80211_scan_cancel(local);
return 0;
+
+out_err:
+ ieee80211_link_release_channel(&sdata->deflink);
+ ieee80211_vif_set_links(sdata, 0);
+ return err;
}
/* config hooks */
@@ -5456,6 +6636,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (!auth_data)
return -ENOMEM;
+ memcpy(auth_data->ap_addr,
+ req->ap_mld_addr ?: req->bss->bssid,
+ ETH_ALEN);
auth_data->bss = req->bss;
if (req->auth_data_len >= 4) {
@@ -5511,14 +6694,14 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
*/
if (cont_auth && req->auth_type == NL80211_AUTHTYPE_SAE &&
auth_data->peer_confirmed && auth_data->sae_trans == 2)
- ieee80211_mark_sta_auth(sdata, req->bss->bssid);
+ ieee80211_mark_sta_auth(sdata);
if (ifmgd->associated) {
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
sdata_info(sdata,
"disconnect from AP %pM for new auth to %pM\n",
- ifmgd->associated->bssid, req->bss->bssid);
+ sdata->vif.cfg.ap_addr, auth_data->ap_addr);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_UNSPECIFIED,
false, frame_buf);
@@ -5529,15 +6712,19 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
false);
}
- sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
+ sdata_info(sdata, "authenticate with %pM\n", auth_data->ap_addr);
- err = ieee80211_prep_connection(sdata, req->bss, cont_auth, false);
+ /* needed for transmitting the auth frame(s) properly */
+ memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN);
+
+ err = ieee80211_prep_connection(sdata, req->bss, req->link_id,
+ req->ap_mld_addr, cont_auth, false);
if (err)
goto err_clear;
err = ieee80211_auth(sdata);
if (err) {
- sta_info_destroy_addr(sdata, req->bss->bssid);
+ sta_info_destroy_addr(sdata, auth_data->ap_addr);
goto err_clear;
}
@@ -5546,38 +6733,192 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
return 0;
err_clear:
- eth_zero_addr(ifmgd->bssid);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
+ if (!sdata->vif.valid_links) {
+ eth_zero_addr(sdata->deflink.u.mgd.bssid);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_BSSID);
+ mutex_lock(&sdata->local->mtx);
+ ieee80211_link_release_channel(&sdata->deflink);
+ mutex_unlock(&sdata->local->mtx);
+ }
ifmgd->auth_data = NULL;
- mutex_lock(&sdata->local->mtx);
- ieee80211_vif_release_channel(sdata);
- mutex_unlock(&sdata->local->mtx);
kfree(auth_data);
return err;
}
+static ieee80211_conn_flags_t
+ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgd_assoc_data *assoc_data,
+ struct cfg80211_assoc_request *req,
+ ieee80211_conn_flags_t conn_flags,
+ unsigned int link_id)
+{
+ struct ieee80211_local *local = sdata->local;
+ const struct cfg80211_bss_ies *beacon_ies;
+ struct ieee80211_supported_band *sband;
+ const struct element *ht_elem, *vht_elem;
+ struct ieee80211_link_data *link;
+ struct cfg80211_bss *cbss;
+ struct ieee80211_bss *bss;
+ bool is_5ghz, is_6ghz;
+
+ cbss = assoc_data->link[link_id].bss;
+ if (WARN_ON(!cbss))
+ return 0;
+
+ bss = (void *)cbss->priv;
+
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+ if (WARN_ON(!sband))
+ return 0;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (WARN_ON(!link))
+ return 0;
+
+ is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ;
+ is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
+
+ /* for MLO connections assume advertising all rates is OK */
+ if (!req->ap_mld_addr) {
+ assoc_data->supp_rates = bss->supp_rates;
+ assoc_data->supp_rates_len = bss->supp_rates_len;
+ }
+
+ /* copy and link elems for the STA profile */
+ if (req->links[link_id].elems_len) {
+ memcpy(assoc_data->ie_pos, req->links[link_id].elems,
+ req->links[link_id].elems_len);
+ assoc_data->link[link_id].elems = assoc_data->ie_pos;
+ assoc_data->link[link_id].elems_len = req->links[link_id].elems_len;
+ assoc_data->ie_pos += req->links[link_id].elems_len;
+ }
+
+ rcu_read_lock();
+ ht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION);
+ if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation))
+ assoc_data->link[link_id].ap_ht_param =
+ ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param;
+ else if (!is_6ghz)
+ conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ vht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY);
+ if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) {
+ memcpy(&assoc_data->link[link_id].ap_vht_cap, vht_elem->data,
+ sizeof(struct ieee80211_vht_cap));
+ } else if (is_5ghz) {
+ link_info(link,
+ "VHT capa missing/short, disabling VHT/HE/EHT\n");
+ conn_flags |= IEEE80211_CONN_DISABLE_VHT |
+ IEEE80211_CONN_DISABLE_HE |
+ IEEE80211_CONN_DISABLE_EHT;
+ }
+ rcu_read_unlock();
+
+ link->u.mgd.beacon_crc_valid = false;
+ link->u.mgd.dtim_period = 0;
+ link->u.mgd.have_beacon = false;
+
+ /* override HT/VHT configuration only if the AP and we support it */
+ if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) {
+ struct ieee80211_sta_ht_cap sta_ht_cap;
+
+ memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+ ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+ }
+
+ rcu_read_lock();
+ beacon_ies = rcu_dereference(cbss->beacon_ies);
+ if (beacon_ies) {
+ const struct element *elem;
+ u8 dtim_count = 0;
+
+ ieee80211_get_dtim(beacon_ies, &dtim_count,
+ &link->u.mgd.dtim_period);
+
+ sdata->deflink.u.mgd.have_beacon = true;
+
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+ link->conf->sync_tsf = beacon_ies->tsf;
+ link->conf->sync_device_ts = bss->device_ts_beacon;
+ link->conf->sync_dtim_count = dtim_count;
+ }
+
+ elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
+ beacon_ies->data, beacon_ies->len);
+ if (elem && elem->datalen >= 3)
+ link->conf->profile_periodicity = elem->data[2];
+ else
+ link->conf->profile_periodicity = 0;
+
+ elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
+ beacon_ies->data, beacon_ies->len);
+ if (elem && elem->datalen >= 11 &&
+ (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+ link->conf->ema_ap = true;
+ else
+ link->conf->ema_ap = false;
+ }
+ rcu_read_unlock();
+
+ if (bss->corrupt_data) {
+ char *corrupt_type = "data";
+
+ if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_BEACON) {
+ if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_PROBE_RESP)
+ corrupt_type = "beacon and probe response";
+ else
+ corrupt_type = "beacon";
+ } else if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_PROBE_RESP) {
+ corrupt_type = "probe response";
+ }
+ sdata_info(sdata, "associating to AP %pM with corrupt %s\n",
+ cbss->bssid, corrupt_type);
+ }
+
+ if (link->u.mgd.req_smps == IEEE80211_SMPS_AUTOMATIC) {
+ if (sdata->u.mgd.powersave)
+ link->smps_mode = IEEE80211_SMPS_DYNAMIC;
+ else
+ link->smps_mode = IEEE80211_SMPS_OFF;
+ } else {
+ link->smps_mode = link->u.mgd.req_smps;
+ }
+
+ return conn_flags;
+}
+
int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct cfg80211_assoc_request *req)
{
- bool is_6ghz = req->bss->channel->band == NL80211_BAND_6GHZ;
- bool is_5ghz = req->bss->channel->band == NL80211_BAND_5GHZ;
+ unsigned int assoc_link_id = req->link_id < 0 ? 0 : req->link_id;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_bss *bss = (void *)req->bss->priv;
struct ieee80211_mgd_assoc_data *assoc_data;
- const struct cfg80211_bss_ies *beacon_ies;
- struct ieee80211_supported_band *sband;
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- const struct element *ssid_elem, *ht_elem, *vht_elem;
+ const struct element *ssid_elem;
+ struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
+ ieee80211_conn_flags_t conn_flags = 0;
+ struct ieee80211_link_data *link;
+ struct cfg80211_bss *cbss;
+ struct ieee80211_bss *bss;
+ bool override;
int i, err;
- bool override = false;
+ size_t size = sizeof(*assoc_data) + req->ie_len;
+
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++)
+ size += req->links[i].elems_len;
+
+ /* FIXME: no support for 4-addr MLO yet */
+ if (sdata->u.mgd.use_4addr && req->link_id >= 0)
+ return -EOPNOTSUPP;
- assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL);
+ assoc_data = kzalloc(size, GFP_KERNEL);
if (!assoc_data)
return -ENOMEM;
+ cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
+
rcu_read_lock();
- ssid_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_SSID);
+ ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
@@ -5585,16 +6926,37 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
}
memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen);
assoc_data->ssid_len = ssid_elem->datalen;
- memcpy(bss_conf->ssid, assoc_data->ssid, assoc_data->ssid_len);
- bss_conf->ssid_len = assoc_data->ssid_len;
+ memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len);
+ vif_cfg->ssid_len = assoc_data->ssid_len;
rcu_read_unlock();
+ if (req->ap_mld_addr) {
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
+ if (!req->links[i].bss)
+ continue;
+ link = sdata_dereference(sdata->link[i], sdata);
+ if (link)
+ ether_addr_copy(assoc_data->link[i].addr,
+ link->conf->addr);
+ else
+ eth_random_addr(assoc_data->link[i].addr);
+ }
+ } else {
+ memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN);
+ }
+
+ assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ;
+
+ memcpy(assoc_data->ap_addr,
+ req->ap_mld_addr ?: req->bss->bssid,
+ ETH_ALEN);
+
if (ifmgd->associated) {
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
sdata_info(sdata,
"disconnect from AP %pM for new assoc to %pM\n",
- ifmgd->associated->bssid, req->bss->bssid);
+ sdata->vif.cfg.ap_addr, assoc_data->ap_addr);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_UNSPECIFIED,
false, frame_buf);
@@ -5619,14 +6981,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
bool match;
/* keep sta info, bssid if matching */
- match = ether_addr_equal(ifmgd->bssid, req->bss->bssid);
+ match = ether_addr_equal(ifmgd->auth_data->ap_addr,
+ assoc_data->ap_addr);
ieee80211_destroy_auth_data(sdata, match);
}
/* prepare assoc data */
- ifmgd->beacon_crc_valid = false;
-
+ bss = (void *)cbss->priv;
assoc_data->wmm = bss->wmm_used &&
(local->hw.queues >= IEEE80211_NUM_ACS);
@@ -5641,25 +7003,47 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+ conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ conn_flags |= IEEE80211_CONN_DISABLE_EHT;
netdev_info(sdata->dev,
"disabling HT/VHT/HE due to WEP/TKIP use\n");
}
}
- sband = local->hw.wiphy->bands[req->bss->channel->band];
-
- /* also disable HT/VHT/HE if the AP doesn't use WMM */
+ /* also disable HT/VHT/HE/EHT if the AP doesn't use WMM */
if (!bss->wmm_used) {
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+ conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ conn_flags |= IEEE80211_CONN_DISABLE_EHT;
netdev_info(sdata->dev,
"disabling HT/VHT/HE as WMM/QoS is not supported by the AP\n");
}
+ if (req->flags & ASSOC_REQ_DISABLE_HT) {
+ mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n");
+ conn_flags |= IEEE80211_CONN_DISABLE_HT;
+ conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_VHT) {
+ mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n");
+ conn_flags |= IEEE80211_CONN_DISABLE_VHT;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_HE) {
+ mlme_dbg(sdata, "HE disabled by flag, disabling HE/EHT\n");
+ conn_flags |= IEEE80211_CONN_DISABLE_HE;
+ conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_EHT)
+ conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+
memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa));
memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
sizeof(ifmgd->ht_capa_mask));
@@ -5675,6 +7059,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (req->ie && req->ie_len) {
memcpy(assoc_data->ie, req->ie, req->ie_len);
assoc_data->ie_len = req->ie_len;
+ assoc_data->ie_pos = assoc_data->ie + assoc_data->ie_len;
+ } else {
+ assoc_data->ie_pos = assoc_data->ie;
}
if (req->fils_kek) {
@@ -5692,28 +7079,39 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
memcpy(assoc_data->fils_nonces, req->fils_nonces,
2 * FILS_NONCE_LEN);
- assoc_data->bss = req->bss;
- assoc_data->capability = req->bss->capability;
- assoc_data->supp_rates = bss->supp_rates;
- assoc_data->supp_rates_len = bss->supp_rates_len;
+ /* default timeout */
+ assoc_data->timeout = jiffies;
+ assoc_data->timeout_started = true;
- rcu_read_lock();
- ht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_HT_OPERATION);
- if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation))
- assoc_data->ap_ht_param =
- ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param;
- else if (!is_6ghz)
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- vht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_VHT_CAPABILITY);
- if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) {
- memcpy(&assoc_data->ap_vht_cap, vht_elem->data,
- sizeof(struct ieee80211_vht_cap));
- } else if (is_5ghz) {
- sdata_info(sdata, "VHT capa missing/short, disabling VHT/HE\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT |
- IEEE80211_STA_DISABLE_HE;
+ assoc_data->assoc_link_id = assoc_link_id;
+
+ if (req->ap_mld_addr) {
+ for (i = 0; i < ARRAY_SIZE(assoc_data->link); i++) {
+ assoc_data->link[i].conn_flags = conn_flags;
+ assoc_data->link[i].bss = req->links[i].bss;
+ }
+
+ /* if there was no authentication, set up the link */
+ err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id));
+ if (err)
+ goto err_clear;
+ } else {
+ assoc_data->link[0].conn_flags = conn_flags;
+ assoc_data->link[0].bss = cbss;
}
- rcu_read_unlock();
+
+ link = sdata_dereference(sdata->link[assoc_link_id], sdata);
+ if (WARN_ON(!link)) {
+ err = -EINVAL;
+ goto err_clear;
+ }
+
+ /* keep old conn_flags from ieee80211_prep_channel() from auth */
+ conn_flags |= link->u.mgd.conn_flags;
+ conn_flags |= ieee80211_setup_assoc_link(sdata, assoc_data, req,
+ conn_flags, assoc_link_id);
+ override = link->u.mgd.conn_flags != conn_flags;
+ link->u.mgd.conn_flags |= conn_flags;
if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK),
@@ -5730,7 +7128,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
}
if (req->prev_bssid)
- memcpy(assoc_data->prev_bssid, req->prev_bssid, ETH_ALEN);
+ memcpy(assoc_data->prev_ap_addr, req->prev_bssid, ETH_ALEN);
if (req->use_mfp) {
ifmgd->mfp = IEEE80211_MFP_REQUIRED;
@@ -5755,138 +7153,60 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
sdata->control_port_over_nl80211 =
req->crypto.control_port_over_nl80211;
sdata->control_port_no_preauth = req->crypto.control_port_no_preauth;
- sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto,
- sdata->vif.type);
/* kick off associate process */
-
ifmgd->assoc_data = assoc_data;
- ifmgd->dtim_period = 0;
- ifmgd->have_beacon = false;
- /* override HT/VHT configuration only if the AP and we support it */
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
- struct ieee80211_sta_ht_cap sta_ht_cap;
-
- if (req->flags & ASSOC_REQ_DISABLE_HT)
- override = true;
-
- memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
- ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
-
- /* check for 40 MHz disable override */
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ) &&
- sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
- !(sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
- override = true;
-
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
- req->flags & ASSOC_REQ_DISABLE_VHT)
- override = true;
- }
-
- if (req->flags & ASSOC_REQ_DISABLE_HT) {
- mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
-
- if (req->flags & ASSOC_REQ_DISABLE_VHT) {
- mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ for (i = 0; i < ARRAY_SIZE(assoc_data->link); i++) {
+ if (!assoc_data->link[i].bss)
+ continue;
+ if (i == assoc_data->assoc_link_id)
+ continue;
+ /* only calculate the flags, hence link == NULL */
+ err = ieee80211_prep_channel(sdata, NULL, assoc_data->link[i].bss,
+ &assoc_data->link[i].conn_flags);
+ if (err)
+ goto err_clear;
}
- if (req->flags & ASSOC_REQ_DISABLE_HE) {
- mlme_dbg(sdata, "HE disabled by flag, disabling VHT\n");
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
- }
+ /* needed for transmitting the assoc frames properly */
+ memcpy(sdata->vif.cfg.ap_addr, assoc_data->ap_addr, ETH_ALEN);
- err = ieee80211_prep_connection(sdata, req->bss, true, override);
+ err = ieee80211_prep_connection(sdata, cbss, req->link_id,
+ req->ap_mld_addr, true, override);
if (err)
goto err_clear;
- if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
- if (ifmgd->powersave)
- sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
- else
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- } else {
- sdata->smps_mode = ifmgd->req_smps;
- }
-
- rcu_read_lock();
- beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
- if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC) &&
- !beacon_ies) {
- /*
- * Wait up to one beacon interval ...
- * should this be more if we miss one?
- */
- sdata_info(sdata, "waiting for beacon from %pM\n",
- ifmgd->bssid);
- assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval);
- assoc_data->timeout_started = true;
- assoc_data->need_beacon = true;
- } else if (beacon_ies) {
- const struct element *elem;
- u8 dtim_count = 0;
+ assoc_data->link[assoc_data->assoc_link_id].conn_flags =
+ link->u.mgd.conn_flags;
- ieee80211_get_dtim(beacon_ies, &dtim_count,
- &ifmgd->dtim_period);
+ if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC)) {
+ const struct cfg80211_bss_ies *beacon_ies;
- ifmgd->have_beacon = true;
- assoc_data->timeout = jiffies;
- assoc_data->timeout_started = true;
+ rcu_read_lock();
+ beacon_ies = rcu_dereference(req->bss->beacon_ies);
- if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
- sdata->vif.bss_conf.sync_tsf = beacon_ies->tsf;
- sdata->vif.bss_conf.sync_device_ts =
- bss->device_ts_beacon;
- sdata->vif.bss_conf.sync_dtim_count = dtim_count;
+ if (beacon_ies) {
+ /*
+ * Wait up to one beacon interval ...
+ * should this be more if we miss one?
+ */
+ sdata_info(sdata, "waiting for beacon from %pM\n",
+ link->u.mgd.bssid);
+ assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval);
+ assoc_data->timeout_started = true;
+ assoc_data->need_beacon = true;
}
-
- elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
- beacon_ies->data, beacon_ies->len);
- if (elem && elem->datalen >= 3)
- sdata->vif.bss_conf.profile_periodicity = elem->data[2];
- else
- sdata->vif.bss_conf.profile_periodicity = 0;
-
- elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
- beacon_ies->data, beacon_ies->len);
- if (elem && elem->datalen >= 11 &&
- (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
- sdata->vif.bss_conf.ema_ap = true;
- else
- sdata->vif.bss_conf.ema_ap = false;
- } else {
- assoc_data->timeout = jiffies;
- assoc_data->timeout_started = true;
+ rcu_read_unlock();
}
- rcu_read_unlock();
run_again(sdata, assoc_data->timeout);
- if (bss->corrupt_data) {
- char *corrupt_type = "data";
- if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_BEACON) {
- if (bss->corrupt_data &
- IEEE80211_BSS_CORRUPT_PROBE_RESP)
- corrupt_type = "beacon and probe response";
- else
- corrupt_type = "beacon";
- } else if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_PROBE_RESP)
- corrupt_type = "probe response";
- sdata_info(sdata, "associating with AP with corrupt %s\n",
- corrupt_type);
- }
-
return 0;
err_clear:
- eth_zero_addr(ifmgd->bssid);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
+ eth_zero_addr(sdata->deflink.u.mgd.bssid);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_BSSID);
ifmgd->assoc_data = NULL;
err_free:
kfree(assoc_data);
@@ -5904,7 +7224,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
};
if (ifmgd->auth_data &&
- ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) {
+ ether_addr_equal(ifmgd->auth_data->ap_addr, req->bssid)) {
sdata_info(sdata,
"aborting authentication with %pM by local choice (Reason: %u=%s)\n",
req->bssid, req->reason_code,
@@ -5924,7 +7244,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
}
if (ifmgd->assoc_data &&
- ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) {
+ ether_addr_equal(ifmgd->assoc_data->ap_addr, req->bssid)) {
sdata_info(sdata,
"aborting association with %pM by local choice (Reason: %u=%s)\n",
req->bssid, req->reason_code,
@@ -5935,7 +7255,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
frame_buf);
- ieee80211_destroy_assoc_data(sdata, false, true);
+ ieee80211_destroy_assoc_data(sdata, ASSOC_ABANDON);
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
@@ -5943,7 +7263,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
}
if (ifmgd->associated &&
- ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
+ ether_addr_equal(sdata->vif.cfg.ap_addr, req->bssid)) {
sdata_info(sdata,
"deauthenticating from %pM by local choice (Reason: %u=%s)\n",
req->bssid, req->reason_code,
@@ -5964,24 +7284,17 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
struct cfg80211_disassoc_request *req)
{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u8 bssid[ETH_ALEN];
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
- /*
- * cfg80211 should catch this ... but it's racy since
- * we can receive a disassoc frame, process it, hand it
- * to cfg80211 while that's in a locked section already
- * trying to tell us that the user wants to disconnect.
- */
- if (ifmgd->associated != req->bss)
- return -ENOLINK;
+ if (!sdata->u.mgd.associated ||
+ memcmp(sdata->vif.cfg.ap_addr, req->ap_addr, ETH_ALEN))
+ return -ENOTCONN;
sdata_info(sdata,
"disassociating from %pM by local choice (Reason: %u=%s)\n",
- req->bss->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code));
+ req->ap_addr, req->reason_code,
+ ieee80211_get_reason_code_string(req->reason_code));
- memcpy(bssid, req->bss->bssid, ETH_ALEN);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC,
req->reason_code, !req->local_state_change,
frame_buf);
@@ -5992,6 +7305,12 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
return 0;
}
+void ieee80211_mgd_stop_link(struct ieee80211_link_data *link)
+{
+ cancel_work_sync(&link->u.mgd.request_smps_work);
+ cancel_work_sync(&link->u.mgd.chswitch_work);
+}
+
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -6003,17 +7322,12 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
*/
cancel_work_sync(&ifmgd->monitor_work);
cancel_work_sync(&ifmgd->beacon_connection_loss_work);
- cancel_work_sync(&ifmgd->request_smps_work);
cancel_work_sync(&ifmgd->csa_connection_drop_work);
- cancel_work_sync(&ifmgd->chswitch_work);
cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work);
sdata_lock(sdata);
- if (ifmgd->assoc_data) {
- struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
- ieee80211_destroy_assoc_data(sdata, false, false);
- cfg80211_assoc_timeout(sdata->dev, bss);
- }
+ if (ifmgd->assoc_data)
+ ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT);
if (ifmgd->auth_data)
ieee80211_destroy_auth_data(sdata, false);
spin_lock_bh(&ifmgd->teardown_lock);
@@ -6052,3 +7366,43 @@ void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp)
cfg80211_cqm_beacon_loss_notify(sdata->dev, gfp);
}
EXPORT_SYMBOL(ieee80211_cqm_beacon_loss_notify);
+
+static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata,
+ int rssi_min_thold,
+ int rssi_max_thold)
+{
+ trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold);
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
+ return;
+
+ /*
+ * Scale up threshold values before storing it, as the RSSI averaging
+ * algorithm uses a scaled up value as well. Change this scaling
+ * factor if the RSSI averaging algorithm changes.
+ */
+ sdata->u.mgd.rssi_min_thold = rssi_min_thold*16;
+ sdata->u.mgd.rssi_max_thold = rssi_max_thold*16;
+}
+
+void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif,
+ int rssi_min_thold,
+ int rssi_max_thold)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ WARN_ON(rssi_min_thold == rssi_max_thold ||
+ rssi_min_thold > rssi_max_thold);
+
+ _ieee80211_enable_rssi_reports(sdata, rssi_min_thold,
+ rssi_max_thold);
+}
+EXPORT_SYMBOL(ieee80211_enable_rssi_reports);
+
+void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ _ieee80211_enable_rssi_reports(sdata, 0, 0);
+}
+EXPORT_SYMBOL(ieee80211_disable_rssi_reports);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 7c1a735b9eee..a57dcbe99a0d 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -4,6 +4,7 @@
*
* Copyright: (c) 2014 Czech Technical University in Prague
* (c) 2014 Volkswagen Group Research
+ * Copyright (C) 2022 Intel Corporation
* Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
* Funded by: Volkswagen Group Research
*/
@@ -59,7 +60,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
ocb_dbg(sdata, "Adding new OCB station %pM\n", addr);
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON_ONCE(!chanctx_conf)) {
rcu_read_unlock();
return;
@@ -74,7 +75,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* Add only mandatory rates for now */
sband = local->hw.wiphy->bands[band];
- sta->sta.supp_rates[band] =
+ sta->sta.deflink.supp_rates[band] =
ieee80211_mandatory_rates(sband, scan_width);
spin_lock(&ifocb->incomplete_lock);
@@ -180,13 +181,13 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
if (ifocb->joined == true)
return -EINVAL;
- sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- sdata->needed_rx_chains = sdata->local->rx_chains;
+ sdata->deflink.operating_11g_mode = true;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
+ sdata->deflink.needed_rx_chains = sdata->local->rx_chains;
mutex_lock(&sdata->local->mtx);
- err = ieee80211_vif_use_channel(sdata, &setup->chandef,
- IEEE80211_CHANCTX_SHARED);
+ err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef,
+ IEEE80211_CHANCTX_SHARED);
mutex_unlock(&sdata->local->mtx);
if (err)
return err;
@@ -228,7 +229,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata)
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_OCB);
mutex_lock(&sdata->local->mtx);
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
mutex_unlock(&sdata->local->mtx);
skb_queue_purge(&sdata->skb_queue);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 853c9a369d72..d78c82d6b696 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -8,7 +8,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019, 2022 Intel Corporation
*/
#include <linux/export.h>
#include <net/mac80211.h>
@@ -118,8 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
set_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED,
&sdata->state);
sdata->vif.bss_conf.enable_beacon = false;
- ieee80211_bss_info_change_notify(
- sdata, BSS_CHANGED_BEACON_ENABLED);
+ ieee80211_link_info_change_notify(
+ sdata, &sdata->deflink,
+ BSS_CHANGED_BEACON_ENABLED);
}
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
@@ -155,8 +156,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
if (test_and_clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED,
&sdata->state)) {
sdata->vif.bss_conf.enable_beacon = true;
- ieee80211_bss_info_change_notify(
- sdata, BSS_CHANGED_BEACON_ENABLED);
+ ieee80211_link_info_change_notify(
+ sdata, &sdata->deflink,
+ BSS_CHANGED_BEACON_ENABLED);
}
}
mutex_unlock(&local->iflist_mtx);
@@ -767,9 +769,11 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
- struct sta_info *sta;
+ struct sta_info *sta = NULL;
const struct ieee80211_mgmt *mgmt = (void *)params->buf;
bool need_offchan = false;
+ bool mlo_sta = false;
+ int link_id = -1;
u32 flags;
int ret;
u8 *data;
@@ -785,7 +789,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
switch (sdata->vif.type) {
case NL80211_IFTYPE_ADHOC:
- if (!sdata->vif.bss_conf.ibss_joined)
+ if (!sdata->vif.cfg.ibss_joined)
need_offchan = true;
#ifdef CONFIG_MAC80211_MESH
fallthrough;
@@ -800,18 +804,32 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
case NL80211_IFTYPE_P2P_GO:
if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
!ieee80211_vif_is_mesh(&sdata->vif) &&
- !rcu_access_pointer(sdata->bss->beacon))
+ !sdata->bss->active)
need_offchan = true;
+
+ rcu_read_lock();
+ sta = sta_info_get_bss(sdata, mgmt->da);
+ mlo_sta = sta && sta->sta.mlo;
+
if (!ieee80211_is_action(mgmt->frame_control) ||
mgmt->u.action.category == WLAN_CATEGORY_PUBLIC ||
mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED ||
- mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT)
+ mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
+ rcu_read_unlock();
break;
- rcu_read_lock();
- sta = sta_info_get_bss(sdata, mgmt->da);
- rcu_read_unlock();
- if (!sta)
+ }
+
+ if (!sta) {
+ rcu_read_unlock();
+ return -ENOLINK;
+ }
+ if (params->link_id >= 0 &&
+ !(sta->sta.valid_links & BIT(params->link_id))) {
+ rcu_read_unlock();
return -ENOLINK;
+ }
+ link_id = params->link_id;
+ rcu_read_unlock();
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -819,8 +837,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (!sdata->u.mgd.associated ||
(params->offchan && params->wait &&
local->ops->remain_on_channel &&
- memcmp(sdata->u.mgd.associated->bssid,
- mgmt->bssid, ETH_ALEN)))
+ memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN)))
need_offchan = true;
sdata_unlock(sdata);
break;
@@ -841,20 +858,41 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
mutex_lock(&local->mtx);
/* Check if the operating channel is the requested channel */
- if (!need_offchan) {
- struct ieee80211_chanctx_conf *chanctx_conf;
+ if (!params->chan && mlo_sta) {
+ need_offchan = false;
+ } else if (!need_offchan) {
+ struct ieee80211_chanctx_conf *chanctx_conf = NULL;
+ int i;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ /* Check all the links first */
+ for (i = 0; i < ARRAY_SIZE(sdata->vif.link_conf); i++) {
+ struct ieee80211_bss_conf *conf;
+
+ conf = rcu_dereference(sdata->vif.link_conf[i]);
+ if (!conf)
+ continue;
+
+ chanctx_conf = rcu_dereference(conf->chanctx_conf);
+ if (!chanctx_conf)
+ continue;
+
+ if (mlo_sta && params->chan == chanctx_conf->def.chan &&
+ ether_addr_equal(sdata->vif.addr, mgmt->sa)) {
+ link_id = i;
+ break;
+ }
+
+ if (ether_addr_equal(conf->addr, mgmt->sa))
+ break;
+
+ chanctx_conf = NULL;
+ }
if (chanctx_conf) {
need_offchan = params->chan &&
(params->chan !=
chanctx_conf->def.chan);
- } else if (!params->chan) {
- ret = -EINVAL;
- rcu_read_unlock();
- goto out_unlock;
} else {
need_offchan = true;
}
@@ -876,7 +914,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
data = skb_put_data(skb, params->buf, params->len);
/* Update CSA counters */
- if (sdata->vif.csa_active &&
+ if (sdata->vif.bss_conf.csa_active &&
(sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
@@ -887,7 +925,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
rcu_read_lock();
if (sdata->vif.type == NL80211_IFTYPE_AP)
- beacon = rcu_dereference(sdata->u.ap.beacon);
+ beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
beacon = rcu_dereference(sdata->u.ibss.presp);
else if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -924,7 +962,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
}
if (!need_offchan) {
- ieee80211_tx_skb(sdata, skb);
+ ieee80211_tx_skb_tid(sdata, skb, 7, link_id);
ret = 0;
goto out_unlock;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 8c6416129d5b..d5ea5f5bcf3a 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -4,6 +4,7 @@
* Copyright 2005-2006, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2017 Intel Deutschland GmbH
+ * Copyright (C) 2022 Intel Corporation
*/
#include <linux/kernel.h>
@@ -36,14 +37,14 @@ void rate_control_rate_init(struct sta_info *sta)
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *chanctx_conf;
- ieee80211_sta_set_rx_nss(sta);
+ ieee80211_sta_set_rx_nss(&sta->deflink);
if (!ref)
return;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
return;
@@ -67,16 +68,18 @@ void rate_control_rate_init(struct sta_info *sta)
}
void rate_control_tx_status(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
struct ieee80211_tx_status *st)
{
struct rate_control_ref *ref = local->rate_ctrl;
struct sta_info *sta = container_of(st->sta, struct sta_info, sta);
void *priv_sta = sta->rate_ctrl_priv;
+ struct ieee80211_supported_band *sband;
if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
return;
+ sband = local->hw.wiphy->bands[st->info->band];
+
spin_lock_bh(&sta->rate_ctrl_lock);
if (ref->ops->tx_status_ext)
ref->ops->tx_status_ext(ref->priv, sband, priv_sta, st);
@@ -89,18 +92,21 @@ void rate_control_tx_status(struct ieee80211_local *local,
}
void rate_control_rate_update(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
- struct sta_info *sta, u32 changed)
+ struct ieee80211_supported_band *sband,
+ struct sta_info *sta, unsigned int link_id,
+ u32 changed)
{
struct rate_control_ref *ref = local->rate_ctrl;
struct ieee80211_sta *ista = &sta->sta;
void *priv_sta = sta->rate_ctrl_priv;
struct ieee80211_chanctx_conf *chanctx_conf;
+ WARN_ON(link_id != 0);
+
if (ref && ref->ops->rate_update) {
rcu_read_lock();
- chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
return;
@@ -112,6 +118,7 @@ void rate_control_rate_update(struct ieee80211_local *local,
spin_unlock_bh(&sta->rate_ctrl_lock);
rcu_read_unlock();
}
+
drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
@@ -263,17 +270,18 @@ static void rate_control_free(struct ieee80211_local *local,
kfree(ctrl_ref);
}
-void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata)
+void ieee80211_check_rate_mask(struct ieee80211_link_data *link)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- u32 user_mask, basic_rates = sdata->vif.bss_conf.basic_rates;
+ u32 user_mask, basic_rates = link->conf->basic_rates;
enum nl80211_band band;
- if (WARN_ON(!sdata->vif.bss_conf.chandef.chan))
+ if (WARN_ON(!link->conf->chandef.chan))
return;
- band = sdata->vif.bss_conf.chandef.chan->band;
+ band = link->conf->chandef.chan->band;
if (band == NL80211_BAND_S1GHZ) {
/* TODO */
return;
@@ -371,7 +379,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
WARN_ONCE(i == sband->n_bitrates,
"no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n",
sta ? sta->addr : NULL,
- sta ? sta->supp_rates[sband->band] : -1,
+ sta ? sta->deflink.supp_rates[sband->band] : -1,
sband->band,
rate_mask, rate_flags);
@@ -781,11 +789,11 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata,
u16 sta_vht_mask[NL80211_VHT_NSS_MAX];
/* Filter out rates that the STA does not support */
- *mask &= sta->supp_rates[sband->band];
+ *mask &= sta->deflink.supp_rates[sband->band];
for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
- mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
+ mcs_mask[i] &= sta->deflink.ht_cap.mcs.rx_mask[i];
- sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map;
+ sta_vht_cap = sta->deflink.vht_cap.vht_mcs.rx_mcs_map;
ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask);
for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
vht_mask[i] &= sta_vht_mask[i];
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 79b44d3db171..d6190f10fe7c 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -3,6 +3,7 @@
* Copyright 2002-2005, Instant802 Networks, Inc.
* Copyright 2005, Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright (C) 2022 Intel Corporation
*/
#ifndef IEEE80211_RATE_H
@@ -26,13 +27,14 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_rate_control *txrc);
void rate_control_tx_status(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
struct ieee80211_tx_status *st);
void rate_control_rate_init(struct sta_info *sta);
void rate_control_rate_update(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
- struct sta_info *sta, u32 changed);
+ struct ieee80211_supported_band *sband,
+ struct sta_info *sta,
+ unsigned int link_id,
+ u32 changed);
static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
struct sta_info *sta, gfp_t gfp)
@@ -83,7 +85,7 @@ static inline void rate_control_add_debugfs(struct ieee80211_local *local)
#endif
}
-void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
+void ieee80211_check_rate_mask(struct ieee80211_link_data *link);
/* Get a reference to the rate control algorithm. If `name' is NULL, get the
* first available algorithm. */
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 9c3b7fc377c1..3d91b98db099 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2019-2020 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
*/
#include <linux/netdevice.h>
#include <linux/types.h>
@@ -10,6 +10,7 @@
#include <linux/random.h>
#include <linux/moduleparam.h>
#include <linux/ieee80211.h>
+#include <linux/minmax.h>
#include <net/mac80211.h>
#include "rate.h"
#include "sta_info.h"
@@ -333,6 +334,17 @@ minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate)
!!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH));
}
+/*
+ * Look up an MCS group index based on new cfg80211 rate_info.
+ */
+static int
+minstrel_ht_ri_get_group_idx(struct rate_info *rate)
+{
+ return GROUP_IDX((rate->mcs / 8) + 1,
+ !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI),
+ !!(rate->bw & RATE_INFO_BW_40));
+}
+
static int
minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate)
{
@@ -342,6 +354,18 @@ minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate)
2*!!(rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH));
}
+/*
+ * Look up an MCS group index based on new cfg80211 rate_info.
+ */
+static int
+minstrel_vht_ri_get_group_idx(struct rate_info *rate)
+{
+ return VHT_GROUP_IDX(rate->nss,
+ !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI),
+ !!(rate->bw & RATE_INFO_BW_40) +
+ 2*!!(rate->bw & RATE_INFO_BW_80));
+}
+
static struct minstrel_rate_stats *
minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
struct ieee80211_tx_rate *rate)
@@ -362,6 +386,9 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
group = MINSTREL_CCK_GROUP;
for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) {
+ if (!(mi->supported[group] & BIT(idx)))
+ continue;
+
if (rate->idx != mp->cck_rates[idx])
continue;
@@ -382,6 +409,50 @@ out:
return &mi->groups[group].rates[idx];
}
+/*
+ * Get the minstrel rate statistics for specified STA and rate info.
+ */
+static struct minstrel_rate_stats *
+minstrel_ht_ri_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+ struct ieee80211_rate_status *rate_status)
+{
+ int group, idx;
+ struct rate_info *rate = &rate_status->rate_idx;
+
+ if (rate->flags & RATE_INFO_FLAGS_MCS) {
+ group = minstrel_ht_ri_get_group_idx(rate);
+ idx = rate->mcs % 8;
+ goto out;
+ }
+
+ if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) {
+ group = minstrel_vht_ri_get_group_idx(rate);
+ idx = rate->mcs;
+ goto out;
+ }
+
+ group = MINSTREL_CCK_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) {
+ if (rate->legacy != minstrel_cck_bitrates[ mp->cck_rates[idx] ])
+ continue;
+
+ /* short preamble */
+ if ((mi->supported[group] & BIT(idx + 4)) &&
+ mi->use_short_preamble)
+ idx += 4;
+ goto out;
+ }
+
+ group = MINSTREL_OFDM_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++)
+ if (rate->legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][idx] ])
+ goto out;
+
+ idx = 0;
+out:
+ return &mi->groups[group].rates[idx];
+}
+
static inline struct minstrel_rate_stats *
minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
{
@@ -603,7 +674,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
int tmp_max_streams, group, tmp_idx, tmp_prob;
int tmp_tp = 0;
- if (!mi->sta->ht_cap.ht_supported)
+ if (!mi->sta->deflink.ht_cap.ht_supported)
return;
group = MI_RATE_GROUP(mi->max_tp_rate[0]);
@@ -993,7 +1064,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate;
u16 index;
- bool ht_supported = mi->sta->ht_cap.ht_supported;
+ bool ht_supported = mi->sta->deflink.ht_cap.ht_supported;
if (mi->ampdu_packets > 0) {
if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN))
@@ -1149,6 +1220,40 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
return false;
}
+/*
+ * Check whether rate_status contains valid information.
+ */
+static bool
+minstrel_ht_ri_txstat_valid(struct minstrel_priv *mp,
+ struct minstrel_ht_sta *mi,
+ struct ieee80211_rate_status *rate_status)
+{
+ int i;
+
+ if (!rate_status)
+ return false;
+ if (!rate_status->try_count)
+ return false;
+
+ if (rate_status->rate_idx.flags & RATE_INFO_FLAGS_MCS ||
+ rate_status->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) {
+ if (rate_status->rate_idx.legacy ==
+ minstrel_cck_bitrates[ mp->cck_rates[i] ])
+ return true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates); i++) {
+ if (rate_status->rate_idx.legacy ==
+ minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][i] ])
+ return true;
+ }
+
+ return false;
+}
+
static void
minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
{
@@ -1214,16 +1319,34 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
mi->ampdu_packets++;
mi->ampdu_len += info->status.ampdu_len;
- last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]);
- for (i = 0; !last; i++) {
- last = (i == IEEE80211_TX_MAX_RATES - 1) ||
- !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]);
+ if (st->rates && st->n_rates) {
+ last = !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[0]));
+ for (i = 0; !last; i++) {
+ last = (i == st->n_rates - 1) ||
+ !minstrel_ht_ri_txstat_valid(mp, mi,
+ &(st->rates[i + 1]));
+
+ rate = minstrel_ht_ri_get_stats(mp, mi,
+ &(st->rates[i]));
- rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
- if (last)
- rate->success += info->status.ampdu_ack_len;
+ if (last)
+ rate->success += info->status.ampdu_ack_len;
- rate->attempts += ar[i].count * info->status.ampdu_len;
+ rate->attempts += st->rates[i].try_count *
+ info->status.ampdu_len;
+ }
+ } else {
+ last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]);
+ for (i = 0; !last; i++) {
+ last = (i == IEEE80211_TX_MAX_RATES - 1) ||
+ !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]);
+
+ rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
+ if (last)
+ rate->success += info->status.ampdu_ack_len;
+
+ rate->attempts += ar[i].count * info->status.ampdu_len;
+ }
}
if (mp->hw->max_rates > 1) {
@@ -1356,7 +1479,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
* - for fallback rates, to increase chances of getting through
*/
if (offset > 0 ||
- (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
+ (mi->sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC &&
group->streams > 1)) {
ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
flags |= IEEE80211_TX_RC_USE_RTS_CTS;
@@ -1416,7 +1539,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
* the limit here to avoid the complexity of having to de-aggregate
* packets in the queue.
*/
- if (!mi->sta->vht_cap.vht_supported)
+ if (!mi->sta->deflink.vht_cap.vht_supported)
return IEEE80211_MAX_MPDU_LEN_HT_BA;
/* unlimited */
@@ -1428,6 +1551,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct ieee80211_sta_rates *rates;
int i = 0;
+ int max_rates = min_t(int, mp->hw->max_rates, IEEE80211_TX_RATE_TABLE_SIZE);
rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
if (!rates)
@@ -1436,17 +1560,18 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
/* Start with max_tp_rate[0] */
minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]);
- if (mp->hw->max_rates >= 3) {
- /* At least 3 tx rates supported, use max_tp_rate[1] next */
- minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[1]);
- }
+ /* Fill up remaining, keep one entry for max_probe_rate */
+ for (; i < (max_rates - 1); i++)
+ minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]);
- if (mp->hw->max_rates >= 2) {
+ if (i < max_rates)
minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
- }
- mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi);
- rates->rate[i].idx = -1;
+ if (i < IEEE80211_TX_RATE_TABLE_SIZE)
+ rates->rate[i].idx = -1;
+
+ mi->sta->deflink.agg.max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi);
+ ieee80211_sta_recalc_aggregates(mi->sta);
rate_control_set_rates(mp->hw, mi->sta, rates);
}
@@ -1533,7 +1658,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (sband->band != NL80211_BAND_2GHZ)
return;
- if (sta->ht_cap.ht_supported &&
+ if (sta->deflink.ht_cap.ht_supported &&
!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
return;
@@ -1556,7 +1681,7 @@ minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
const u8 *rates;
int i;
- if (sta->ht_cap.ht_supported)
+ if (sta->deflink.ht_cap.ht_supported)
return;
rates = mp->ofdm_rates[sband->band];
@@ -1576,10 +1701,11 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
{
struct minstrel_priv *mp = priv;
struct minstrel_ht_sta *mi = priv_sta;
- struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
- u16 ht_cap = sta->ht_cap.cap;
- struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
+ struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs;
+ u16 ht_cap = sta->deflink.ht_cap.cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap;
const struct ieee80211_rate *ctl_rate;
+ struct sta_info *sta_info;
bool ldpc, erp;
int use_vht;
int n_supported = 0;
@@ -1650,13 +1776,13 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
}
if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH &&
- sta->bandwidth < IEEE80211_STA_RX_BW_40)
+ sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40)
continue;
nss = minstrel_mcs_groups[i].streams;
/* Mark MCS > 7 as unsupported if STA is in static SMPS mode */
- if (sta->smps_mode == IEEE80211_SMPS_STATIC && nss > 1)
+ if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC && nss > 1)
continue;
/* HT rate */
@@ -1677,7 +1803,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
continue;
if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) {
- if (sta->bandwidth < IEEE80211_STA_RX_BW_80 ||
+ if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 ||
((gflags & IEEE80211_TX_RC_SHORT_GI) &&
!(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) {
continue;
@@ -1698,6 +1824,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
n_supported++;
}
+ sta_info = container_of(sta, struct sta_info, sta);
+ mi->use_short_preamble = test_sta_flag(sta_info, WLAN_STA_SHORT_PREAMBLE) &&
+ sta_info->sdata->vif.bss_conf.use_short_preamble;
+
minstrel_ht_update_cck(mp, mi, sband, sta);
minstrel_ht_update_ofdm(mp, mi, sband, sta);
@@ -1906,7 +2036,7 @@ static void __init init_sample_table(void)
memset(sample_table, 0xff, sizeof(sample_table));
for (col = 0; col < SAMPLE_COLUMNS; col++) {
- prandom_bytes(rnd, sizeof(rnd));
+ get_random_bytes(rnd, sizeof(rnd));
for (i = 0; i < MCS_GROUP_RATES; i++) {
new_idx = (i + rnd[i]) % MCS_GROUP_RATES;
while (sample_table[col][new_idx] != 0xff)
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 06e7126727ad..1766ff0c78d3 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -180,7 +180,7 @@ struct minstrel_ht_sta {
/* tx flags to add for frames for this sta */
u32 tx_flags;
-
+ bool use_short_preamble;
u8 band;
u8 sample_seq;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 93680af62c47..f99416d2e144 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -49,7 +49,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb,
if (present_fcs_len)
__pskb_trim(skb, skb->len - present_fcs_len);
- __pskb_pull(skb, rtap_space);
+ pskb_pull(skb, rtap_space);
hdr = (void *)skb->data;
fc = hdr->frame_control;
@@ -74,7 +74,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb,
memmove(skb->data + IEEE80211_HT_CTL_LEN, skb->data,
hdrlen - IEEE80211_HT_CTL_LEN);
- __pskb_pull(skb, IEEE80211_HT_CTL_LEN);
+ pskb_pull(skb, IEEE80211_HT_CTL_LEN);
return skb;
}
@@ -215,21 +215,32 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
}
static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+ int link_id,
struct sta_info *sta,
struct sk_buff *skb)
{
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+
+ if (link_id >= 0) {
+ status->link_valid = 1;
+ status->link_id = link_id;
+ } else {
+ status->link_valid = 0;
+ }
+
skb_queue_tail(&sdata->skb_queue, skb);
ieee80211_queue_work(&sdata->local->hw, &sdata->work);
if (sta)
- sta->rx_stats.packets++;
+ sta->deflink.rx_stats.packets++;
}
static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+ int link_id,
struct sta_info *sta,
struct sk_buff *skb)
{
skb->protocol = 0;
- __ieee80211_queue_skb_to_iface(sdata, sta, skb);
+ __ieee80211_queue_skb_to_iface(sdata, link_id, sta, skb);
}
static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
@@ -272,7 +283,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
if (!skb)
return;
- ieee80211_queue_skb_to_iface(sdata, NULL, skb);
+ ieee80211_queue_skb_to_iface(sdata, -1, NULL, skb);
}
/*
@@ -1009,43 +1020,20 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
return -1;
}
-static int ieee80211_get_keyid(struct sk_buff *skb,
- const struct ieee80211_cipher_scheme *cs)
+static int ieee80211_get_keyid(struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- __le16 fc;
- int hdrlen;
- int minlen;
- u8 key_idx_off;
- u8 key_idx_shift;
+ __le16 fc = hdr->frame_control;
+ int hdrlen = ieee80211_hdrlen(fc);
u8 keyid;
- fc = hdr->frame_control;
- hdrlen = ieee80211_hdrlen(fc);
-
- if (cs) {
- minlen = hdrlen + cs->hdr_len;
- key_idx_off = hdrlen + cs->key_idx_off;
- key_idx_shift = cs->key_idx_shift;
- } else {
- /* WEP, TKIP, CCMP and GCMP */
- minlen = hdrlen + IEEE80211_WEP_IV_LEN;
- key_idx_off = hdrlen + 3;
- key_idx_shift = 6;
- }
-
- if (unlikely(skb->len < minlen))
+ /* WEP, TKIP, CCMP and GCMP */
+ if (unlikely(skb->len < hdrlen + IEEE80211_WEP_IV_LEN))
return -EINVAL;
- skb_copy_bits(skb, key_idx_off, &keyid, 1);
+ skb_copy_bits(skb, hdrlen + 3, &keyid, 1);
- if (cs)
- keyid &= cs->key_idx_mask;
- keyid >>= key_idx_shift;
-
- /* cs could use more than the usual two bits for the keyid */
- if (unlikely(keyid >= NUM_DEFAULT_KEYS))
- return -EINVAL;
+ keyid >>= 6;
return keyid;
}
@@ -1405,8 +1393,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
goto dont_reorder;
/* not part of a BA session */
- if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
- ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
goto dont_reorder;
/* new, potentially un-ordered, ampdu frame - process it */
@@ -1418,7 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
/* if this mpdu is fragmented - terminate rx aggregation session */
sc = le16_to_cpu(hdr->seq_ctrl);
if (sc & IEEE80211_SCTL_FRAG) {
- ieee80211_queue_skb_to_iface(rx->sdata, NULL, skb);
+ ieee80211_queue_skb_to_iface(rx->sdata, rx->link_id, NULL, skb);
return;
}
@@ -1465,7 +1452,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
- rx->sta->rx_stats.num_duplicates++;
+ rx->link_sta->rx_stats.num_duplicates++;
return RX_DROP_UNUSABLE;
} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
@@ -1589,8 +1576,12 @@ static void sta_ps_start(struct sta_info *sta)
for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi = to_txq_info(txq);
- ieee80211_unschedule_txq(&local->hw, txq, false);
+ spin_lock(&local->active_txq_lock[txq->ac]);
+ if (!list_empty(&txqi->schedule_order))
+ list_del_init(&txqi->schedule_order);
+ spin_unlock(&local->active_txq_lock[txq->ac]);
if (txq_has_queue(txq))
set_bit(tid, &sta->txq_buffered_tids);
@@ -1740,12 +1731,13 @@ static ieee80211_rx_result debug_noinline
ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
{
struct sta_info *sta = rx->sta;
+ struct link_sta_info *link_sta = rx->link_sta;
struct sk_buff *skb = rx->skb;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
int i;
- if (!sta)
+ if (!sta || !link_sta)
return RX_CONTINUE;
/*
@@ -1761,46 +1753,47 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
NL80211_IFTYPE_ADHOC);
if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) &&
test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
- sta->rx_stats.last_rx = jiffies;
+ link_sta->rx_stats.last_rx = jiffies;
if (ieee80211_is_data(hdr->frame_control) &&
!is_multicast_ether_addr(hdr->addr1))
- sta->rx_stats.last_rate =
+ link_sta->rx_stats.last_rate =
sta_stats_encode_rate(status);
}
} else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
- sta->rx_stats.last_rx = jiffies;
+ link_sta->rx_stats.last_rx = jiffies;
} else if (!ieee80211_is_s1g_beacon(hdr->frame_control) &&
!is_multicast_ether_addr(hdr->addr1)) {
/*
* Mesh beacons will update last_rx when if they are found to
* match the current local configuration when processed.
*/
- sta->rx_stats.last_rx = jiffies;
+ link_sta->rx_stats.last_rx = jiffies;
if (ieee80211_is_data(hdr->frame_control))
- sta->rx_stats.last_rate = sta_stats_encode_rate(status);
+ link_sta->rx_stats.last_rate = sta_stats_encode_rate(status);
}
- sta->rx_stats.fragments++;
+ link_sta->rx_stats.fragments++;
- u64_stats_update_begin(&rx->sta->rx_stats.syncp);
- sta->rx_stats.bytes += rx->skb->len;
- u64_stats_update_end(&rx->sta->rx_stats.syncp);
+ u64_stats_update_begin(&link_sta->rx_stats.syncp);
+ link_sta->rx_stats.bytes += rx->skb->len;
+ u64_stats_update_end(&link_sta->rx_stats.syncp);
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
- sta->rx_stats.last_signal = status->signal;
- ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal);
+ link_sta->rx_stats.last_signal = status->signal;
+ ewma_signal_add(&link_sta->rx_stats_avg.signal,
+ -status->signal);
}
if (status->chains) {
- sta->rx_stats.chains = status->chains;
+ link_sta->rx_stats.chains = status->chains;
for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
int signal = status->chain_signal[i];
if (!(status->chains & BIT(i)))
continue;
- sta->rx_stats.chain_signal_last[i] = signal;
- ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+ link_sta->rx_stats.chain_signal_last[i] = signal;
+ ewma_signal_add(&link_sta->rx_stats_avg.chain_signal[i],
-signal);
}
}
@@ -1861,7 +1854,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
* Update counter and free packet here to avoid
* counting this as a dropped packed.
*/
- sta->rx_stats.packets++;
+ link_sta->rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
}
@@ -1873,7 +1866,6 @@ static struct ieee80211_key *
ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx)
{
struct ieee80211_key *key = NULL;
- struct ieee80211_sub_if_data *sdata = rx->sdata;
int idx2;
/* Make sure key gets set if either BIGTK key index is set so that
@@ -1892,14 +1884,14 @@ ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx)
idx2 = idx - 1;
}
- if (rx->sta)
- key = rcu_dereference(rx->sta->gtk[idx]);
+ if (rx->link_sta)
+ key = rcu_dereference(rx->link_sta->gtk[idx]);
if (!key)
- key = rcu_dereference(sdata->keys[idx]);
- if (!key && rx->sta)
- key = rcu_dereference(rx->sta->gtk[idx2]);
+ key = rcu_dereference(rx->link->gtk[idx]);
+ if (!key && rx->link_sta)
+ key = rcu_dereference(rx->link_sta->gtk[idx2]);
if (!key)
- key = rcu_dereference(sdata->keys[idx2]);
+ key = rcu_dereference(rx->link->gtk[idx2]);
return key;
}
@@ -1916,7 +1908,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
struct ieee80211_key *ptk_idx = NULL;
int mmie_keyidx = -1;
__le16 fc;
- const struct ieee80211_cipher_scheme *cs = NULL;
if (ieee80211_is_ext(hdr->frame_control))
return RX_CONTINUE;
@@ -1959,8 +1950,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (ieee80211_has_protected(fc) &&
!(status->flag & RX_FLAG_IV_STRIPPED)) {
- cs = rx->sta->cipher_scheme;
- keyid = ieee80211_get_keyid(rx->skb, cs);
+ keyid = ieee80211_get_keyid(rx->skb);
if (unlikely(keyid < 0))
return RX_DROP_UNUSABLE;
@@ -1988,10 +1978,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS ||
mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS) {
- cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
- skb->data,
- skb->len);
+ NUM_DEFAULT_BEACON_KEYS) {
+ if (rx->sdata->dev)
+ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+ skb->data,
+ skb->len);
return RX_DROP_MONITOR; /* unexpected BIP keyidx */
}
@@ -2007,15 +1998,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (mmie_keyidx < NUM_DEFAULT_KEYS ||
mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
return RX_DROP_MONITOR; /* unexpected BIP keyidx */
- if (rx->sta) {
+ if (rx->link_sta) {
if (ieee80211_is_group_privacy_action(skb) &&
test_sta_flag(rx->sta, WLAN_STA_MFP))
return RX_DROP_MONITOR;
- rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+ rx->key = rcu_dereference(rx->link_sta->gtk[mmie_keyidx]);
}
if (!rx->key)
- rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
+ rx->key = rcu_dereference(rx->link->gtk[mmie_keyidx]);
} else if (!ieee80211_has_protected(fc)) {
/*
* The frame was not protected, so skip decryption. However, we
@@ -2024,25 +2015,24 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
* have been expected.
*/
struct ieee80211_key *key = NULL;
- struct ieee80211_sub_if_data *sdata = rx->sdata;
int i;
if (ieee80211_is_beacon(fc)) {
key = ieee80211_rx_get_bigtk(rx, -1);
} else if (ieee80211_is_mgmt(fc) &&
is_multicast_ether_addr(hdr->addr1)) {
- key = rcu_dereference(rx->sdata->default_mgmt_key);
+ key = rcu_dereference(rx->link->default_mgmt_key);
} else {
- if (rx->sta) {
+ if (rx->link_sta) {
for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- key = rcu_dereference(rx->sta->gtk[i]);
+ key = rcu_dereference(rx->link_sta->gtk[i]);
if (key)
break;
}
}
if (!key) {
for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- key = rcu_dereference(sdata->keys[i]);
+ key = rcu_dereference(rx->link->gtk[i]);
if (key)
break;
}
@@ -2065,18 +2055,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
(status->flag & RX_FLAG_IV_STRIPPED))
return RX_CONTINUE;
- keyidx = ieee80211_get_keyid(rx->skb, cs);
+ keyidx = ieee80211_get_keyid(rx->skb);
if (unlikely(keyidx < 0))
return RX_DROP_UNUSABLE;
/* check per-station GTK first, if multicast packet */
- if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
- rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
+ if (is_multicast_ether_addr(hdr->addr1) && rx->link_sta)
+ rx->key = rcu_dereference(rx->link_sta->gtk[keyidx]);
/* if not found, try default key */
if (!rx->key) {
- rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
+ if (is_multicast_ether_addr(hdr->addr1))
+ rx->key = rcu_dereference(rx->link->gtk[keyidx]);
+ if (!rx->key)
+ rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
/*
* RSNA-protected unicast frames should always be
@@ -2131,7 +2124,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
result = ieee80211_crypto_gcmp_decrypt(rx);
break;
default:
- result = ieee80211_crypto_hw_decrypt(rx);
+ result = RX_DROP_UNUSABLE;
}
/* the hdr variable is invalid after the decrypt handlers */
@@ -2139,7 +2132,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
- if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE))
+ if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE &&
+ rx->sdata->dev))
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data, skb->len);
@@ -2398,7 +2392,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
out:
ieee80211_led_rx(rx->local);
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->link_sta->rx_stats.packets++;
return RX_CONTINUE;
}
@@ -2550,6 +2544,35 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control)
return 0;
}
+bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata,
+ const u8 *addr, int *out_link_id)
+{
+ unsigned int link_id;
+
+ /* non-MLO, or MLD address replaced by hardware */
+ if (ether_addr_equal(sdata->vif.addr, addr))
+ return true;
+
+ if (!sdata->vif.valid_links)
+ return false;
+
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->vif.link_conf); link_id++) {
+ struct ieee80211_bss_conf *conf;
+
+ conf = rcu_dereference(sdata->vif.link_conf[link_id]);
+
+ if (!conf)
+ continue;
+ if (ether_addr_equal(conf->addr, addr)) {
+ if (out_link_id)
+ *out_link_id = link_id;
+ return true;
+ }
+ }
+
+ return false;
+}
+
/*
* requires that rx->skb is a frame with ethernet header
*/
@@ -2565,7 +2588,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
* all other destination addresses for them.
*/
if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
- return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+ return ieee80211_is_our_addr(rx->sdata, ehdr->h_dest, NULL) ||
ether_addr_equal(ehdr->h_dest, pae_group_addr);
if (ieee80211_802_1x_port_control(rx) ||
@@ -2607,7 +2630,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
* address, so that the authenticator (e.g. hostapd) will see
* the frame, but bridge won't forward it anywhere else. Note
* that due to earlier filtering, the only other address can
- * be the PAE group address.
+ * be the PAE group address, unless the hardware allowed them
+ * through in 802.3 offloaded mode.
*/
if (unlikely(skb->protocol == sdata->control_port_protocol &&
!ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
@@ -2644,9 +2668,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
* for non-QoS-data frames. Here we know it's a data
* frame, so count MSDUs.
*/
- u64_stats_update_begin(&rx->sta->rx_stats.syncp);
- rx->sta->rx_stats.msdu[rx->seqno_idx]++;
- u64_stats_update_end(&rx->sta->rx_stats.syncp);
+ u64_stats_update_begin(&rx->link_sta->rx_stats.syncp);
+ rx->link_sta->rx_stats.msdu[rx->seqno_idx]++;
+ u64_stats_update_end(&rx->link_sta->rx_stats.syncp);
}
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
@@ -2922,13 +2946,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee802_1d_to_ac[skb->priority];
q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
}
- skb_set_queue_mapping(skb, q);
+ skb_set_queue_mapping(skb, ac);
if (!--mesh_hdr->ttl) {
if (!is_multicast_ether_addr(hdr->addr1))
@@ -2944,7 +2968,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
tailroom = IEEE80211_ENCRYPT_TAILROOM;
fwd_skb = skb_copy_expand(skb, local->tx_headroom +
- sdata->encrypt_headroom,
+ IEEE80211_ENCRYPT_HEADROOM,
tailroom, GFP_ATOMIC);
if (!fwd_skb)
goto out;
@@ -3034,7 +3058,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
(tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_REQUEST ||
tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_RESPONSE)) {
rx->skb->protocol = cpu_to_be16(ETH_P_TDLS);
- __ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
+ __ieee80211_queue_skb_to_iface(sdata, rx->link_id,
+ rx->sta, rx->skb);
return RX_QUEUED;
}
}
@@ -3145,8 +3170,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
return;
}
- if (!ether_addr_equal(mgmt->sa, sdata->u.mgd.bssid) ||
- !ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid)) {
+ if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) ||
+ !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) {
/* Not from the current AP or not associated yet. */
return;
}
@@ -3164,7 +3189,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
resp = skb_put_zero(skb, 24);
memcpy(resp->da, mgmt->sa, ETH_ALEN);
memcpy(resp->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ACTION);
skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
@@ -3177,6 +3202,50 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
ieee80211_tx_skb(sdata, skb);
}
+static void
+ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
+{
+ struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
+ const struct element *ie;
+ size_t baselen;
+
+ if (!wiphy_ext_feature_isset(rx->local->hw.wiphy,
+ NL80211_EXT_FEATURE_BSS_COLOR))
+ return;
+
+ if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION))
+ return;
+
+ if (rx->sdata->vif.bss_conf.csa_active)
+ return;
+
+ baselen = mgmt->u.beacon.variable - rx->skb->data;
+ if (baselen > rx->skb->len)
+ return;
+
+ ie = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION,
+ mgmt->u.beacon.variable,
+ rx->skb->len - baselen);
+ if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) &&
+ ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) {
+ struct ieee80211_bss_conf *bss_conf = &rx->sdata->vif.bss_conf;
+ const struct ieee80211_he_operation *he_oper;
+ u8 color;
+
+ he_oper = (void *)(ie->data + 1);
+ if (le32_get_bits(he_oper->he_oper_params,
+ IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED))
+ return;
+
+ color = le32_get_bits(he_oper->he_oper_params,
+ IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
+ if (color == bss_conf->he_bss_color.color)
+ ieeee80211_obss_color_collision_notify(&rx->sdata->vif,
+ BIT_ULL(color),
+ GFP_ATOMIC);
+ }
+}
+
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
{
@@ -3202,6 +3271,9 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
!(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
int sig = 0;
+ /* sw bss color collision detection */
+ ieee80211_rx_check_bss_color_collision(rx);
+
if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
!(status->flag & RX_FLAG_NO_SIGNAL_VAL))
sig = status->signal;
@@ -3295,7 +3367,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
switch (mgmt->u.action.category) {
case WLAN_CATEGORY_HT:
/* reject HT action frames from stations not supporting HT */
- if (!rx->sta->sta.ht_cap.ht_supported)
+ if (!rx->link_sta->pub->ht_cap.ht_supported)
goto invalid;
if (sdata->vif.type != NL80211_IFTYPE_STATION &&
@@ -3335,16 +3407,16 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
}
/* if no change do nothing */
- if (rx->sta->sta.smps_mode == smps_mode)
+ if (rx->link_sta->pub->smps_mode == smps_mode)
goto handled;
- rx->sta->sta.smps_mode = smps_mode;
+ rx->link_sta->pub->smps_mode = smps_mode;
sta_opmode.smps_mode =
ieee80211_smps_mode_to_smps_mode(smps_mode);
sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
sband = rx->local->hw.wiphy->bands[status->band];
- rate_control_rate_update(local, sband, rx->sta,
+ rate_control_rate_update(local, sband, rx->sta, 0,
IEEE80211_RC_SMPS_CHANGED);
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
@@ -3359,29 +3431,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
struct sta_opmode_info sta_opmode = {};
/* If it doesn't support 40 MHz it can't change ... */
- if (!(rx->sta->sta.ht_cap.cap &
+ if (!(rx->link_sta->pub->ht_cap.cap &
IEEE80211_HT_CAP_SUP_WIDTH_20_40))
goto handled;
if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ)
max_bw = IEEE80211_STA_RX_BW_20;
else
- max_bw = ieee80211_sta_cap_rx_bw(rx->sta);
+ max_bw = ieee80211_sta_cap_rx_bw(rx->link_sta);
/* set cur_max_bandwidth and recalc sta bw */
- rx->sta->cur_max_bandwidth = max_bw;
- new_bw = ieee80211_sta_cur_vht_bw(rx->sta);
+ rx->link_sta->cur_max_bandwidth = max_bw;
+ new_bw = ieee80211_sta_cur_vht_bw(rx->link_sta);
- if (rx->sta->sta.bandwidth == new_bw)
+ if (rx->link_sta->pub->bandwidth == new_bw)
goto handled;
- rx->sta->sta.bandwidth = new_bw;
+ rx->link_sta->pub->bandwidth = new_bw;
sband = rx->local->hw.wiphy->bands[status->band];
sta_opmode.bw =
- ieee80211_sta_rx_bw_to_chan_width(rx->sta);
+ ieee80211_sta_rx_bw_to_chan_width(rx->link_sta);
sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
- rate_control_rate_update(local, sband, rx->sta,
+ rate_control_rate_update(local, sband, rx->sta, 0,
IEEE80211_RC_BW_CHANGED);
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
@@ -3401,7 +3473,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
if (!rx->sta)
break;
- if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid))
+ if (!ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid))
break;
if (mgmt->u.action.u.ext_chan_switch.action_code !=
WLAN_PUB_ACTION_EXT_CHANSW_ANN)
@@ -3502,7 +3574,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
if (sdata->vif.type == NL80211_IFTYPE_STATION)
- bssid = sdata->u.mgd.bssid;
+ bssid = sdata->deflink.u.mgd.bssid;
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
bssid = sdata->u.ibss.bssid;
else if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
@@ -3572,12 +3644,12 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
handled:
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->link_sta->rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
queue:
- ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
+ ieee80211_queue_skb_to_iface(sdata, rx->link_id, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3585,7 +3657,13 @@ static ieee80211_rx_result debug_noinline
ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
- int sig = 0;
+ struct cfg80211_rx_info info = {
+ .freq = ieee80211_rx_status_to_khz(status),
+ .buf = rx->skb->data,
+ .len = rx->skb->len,
+ .link_id = rx->link_id,
+ .have_link_id = rx->link_id >= 0,
+ };
/* skip known-bad action frames and return them in the next handler */
if (status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM)
@@ -3600,13 +3678,17 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
!(status->flag & RX_FLAG_NO_SIGNAL_VAL))
- sig = status->signal;
+ info.sig_dbm = status->signal;
+
+ if (ieee80211_is_timing_measurement(rx->skb) ||
+ ieee80211_is_ftm(rx->skb)) {
+ info.rx_tstamp = ktime_to_ns(skb_hwtstamps(rx->skb)->hwtstamp);
+ info.ack_tstamp = ktime_to_ns(status->ack_tx_hwtstamp);
+ }
- if (cfg80211_rx_mgmt_khz(&rx->sdata->wdev,
- ieee80211_rx_status_to_khz(status), sig,
- rx->skb->data, rx->skb->len, 0)) {
+ if (cfg80211_rx_mgmt_ext(&rx->sdata->wdev, &info)) {
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->link_sta->rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
}
@@ -3644,7 +3726,7 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx)
handled:
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->link_sta->rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
}
@@ -3705,7 +3787,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
local->hw.offchannel_tx_hw_queue;
}
- __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7,
+ __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, -1,
status->band);
}
dev_kfree_skb(rx->skb);
@@ -3725,7 +3807,7 @@ ieee80211_rx_h_ext(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
/* for now only beacons are ext, so queue them */
- ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
+ ieee80211_queue_skb_to_iface(sdata, rx->link_id, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3782,7 +3864,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
+ ieee80211_queue_skb_to_iface(sdata, rx->link_id, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3864,7 +3946,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
case RX_DROP_MONITOR:
I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
if (rx->sta)
- rx->sta->rx_stats.dropped++;
+ rx->link_sta->rx_stats.dropped++;
fallthrough;
case RX_CONTINUE: {
struct ieee80211_rate *rate = NULL;
@@ -3883,7 +3965,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
case RX_DROP_UNUSABLE:
I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
if (rx->sta)
- rx->sta->rx_stats.dropped++;
+ rx->link_sta->rx_stats.dropped++;
dev_kfree_skb(rx->skb);
break;
case RX_QUEUED:
@@ -3921,6 +4003,9 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
*/
rx->skb = skb;
+ if (WARN_ON_ONCE(!rx->link))
+ goto rxh_next;
+
CALL_RXH(ieee80211_rx_h_check_more_data);
CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll);
CALL_RXH(ieee80211_rx_h_sta_process);
@@ -3999,8 +4084,10 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
+ .link_id = -1,
};
struct tid_ampdu_rx *tid_agg_rx;
+ u8 link_id;
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_agg_rx)
@@ -4020,6 +4107,10 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
};
drv_event_callback(rx.local, rx.sdata, &event);
}
+ /* FIXME: statistics won't be right with this */
+ link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0;
+ rx.link = rcu_dereference(sta->sdata->link[link_id]);
+ rx.link_sta = rcu_dereference(sta->link[link_id]);
ieee80211_rx_handlers(&rx, &frames);
}
@@ -4035,6 +4126,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
+ .link_id = -1,
};
int i, diff;
@@ -4047,6 +4139,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
rx.sta = sta;
rx.sdata = sta->sdata;
+ rx.link = &rx.sdata->deflink;
rx.local = sta->local;
rcu_read_lock();
@@ -4105,6 +4198,12 @@ EXPORT_SYMBOL(ieee80211_mark_rx_ba_filtered_frames);
/* main receive path */
+static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
+{
+ return ether_addr_equal(raddr, addr) ||
+ is_broadcast_ether_addr(raddr);
+}
+
static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
{
struct ieee80211_sub_if_data *sdata = rx->sdata;
@@ -4123,7 +4222,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
return false;
if (multicast)
return true;
- return ether_addr_equal(sdata->vif.addr, hdr->addr1);
+ return ieee80211_is_our_addr(sdata, hdr->addr1, &rx->link_id);
case NL80211_IFTYPE_ADHOC:
if (!bssid)
return false;
@@ -4177,9 +4276,11 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
if (!bssid)
- return ether_addr_equal(sdata->vif.addr, hdr->addr1);
+ return ieee80211_is_our_addr(sdata, hdr->addr1,
+ &rx->link_id);
- if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
+ if (!is_broadcast_ether_addr(bssid) &&
+ !ieee80211_is_our_addr(sdata, bssid, NULL)) {
/*
* Accept public action frames even when the
* BSSID doesn't match, this is used for P2P
@@ -4187,7 +4288,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
* itself never looks at these frames.
*/
if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1))
+ !ieee80211_is_our_addr(sdata, hdr->addr1,
+ &rx->link_id))
return false;
if (ieee80211_is_public_action(hdr, skb->len))
return true;
@@ -4252,6 +4354,7 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
.vif_type = sdata->vif.type,
.control_port_protocol = sdata->control_port_protocol,
}, *old, *new = NULL;
+ u32 offload_flags;
bool set_offload = false;
bool assign = false;
bool offload;
@@ -4367,10 +4470,10 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
if (assign)
new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL);
- offload = assign &&
- (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
+ offload_flags = get_bss_sdata(sdata)->vif.offload_flags;
+ offload = offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED;
- if (offload)
+ if (assign && offload)
set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
else
set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
@@ -4424,6 +4527,15 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->sta_mtx);
}
+static bool
+ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
+{
+ if (!sta->mlo)
+ return false;
+
+ return !!(sta->valid_links & BIT(link_id));
+}
+
static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
struct ieee80211_fast_rx *fast_rx,
int orig_len)
@@ -4431,19 +4543,30 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
struct ieee80211_sta_rx_stats *stats;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
struct sta_info *sta = rx->sta;
+ struct link_sta_info *link_sta;
struct sk_buff *skb = rx->skb;
void *sa = skb->data + ETH_ALEN;
void *da = skb->data;
- stats = &sta->rx_stats;
+ if (rx->link_id >= 0) {
+ link_sta = rcu_dereference(sta->link[rx->link_id]);
+ if (WARN_ON_ONCE(!link_sta)) {
+ dev_kfree_skb(rx->skb);
+ return;
+ }
+ } else {
+ link_sta = &sta->deflink;
+ }
+
+ stats = &link_sta->rx_stats;
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(link_sta->pcpu_rx_stats);
/* statistics part of ieee80211_rx_h_sta_process() */
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
if (!fast_rx->uses_rss)
- ewma_signal_add(&sta->rx_stats_avg.signal,
+ ewma_signal_add(&link_sta->rx_stats_avg.signal,
-status->signal);
}
@@ -4459,7 +4582,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
stats->chain_signal_last[i] = signal;
if (!fast_rx->uses_rss)
- ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+ ewma_signal_add(&link_sta->rx_stats_avg.chain_signal[i],
-signal);
}
}
@@ -4514,12 +4637,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, fast_rx->dev);
- memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->list)
- list_add_tail(&skb->list, rx->list);
- else
- netif_receive_skb(skb);
-
+ ieee80211_deliver_skb_to_local_stack(skb, rx);
}
static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
@@ -4540,7 +4658,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 da[ETH_ALEN];
u8 sa[ETH_ALEN];
} addrs __aligned(2);
- struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
+ struct link_sta_info *link_sta;
+ struct ieee80211_sta_rx_stats *stats;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
* to a common data structure; drivers can implement that per queue
@@ -4592,7 +4711,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
- goto drop;
+ return false;
payload = (void *)(skb->data + snap_offs);
@@ -4629,6 +4748,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
/* do the header conversion - first grab the addresses */
ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
+ skb_postpull_rcsum(skb, skb->data + snap_offs,
+ sizeof(rfc1042_header) + 2);
/* remove the SNAP but leave the ethertype */
skb_pull(skb, snap_offs + sizeof(rfc1042_header));
/* push the addresses in front */
@@ -4639,8 +4760,19 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
return true;
drop:
dev_kfree_skb(skb);
+
+ if (rx->link_id >= 0) {
+ link_sta = rcu_dereference(sta->link[rx->link_id]);
+ if (!link_sta)
+ return true;
+ } else {
+ link_sta = &sta->deflink;
+ }
+
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(link_sta->pcpu_rx_stats);
+ else
+ stats = &link_sta->rx_stats;
stats->dropped++;
return true;
@@ -4657,6 +4789,9 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
{
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+ struct link_sta_info *link_sta = NULL;
+ struct ieee80211_link_data *link;
rx->skb = skb;
@@ -4678,9 +4813,40 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
if (!ieee80211_accept_frame(rx))
return false;
+ if (rx->link_id >= 0) {
+ link = rcu_dereference(rx->sdata->link[rx->link_id]);
+
+ /* we might race link removal */
+ if (!link)
+ return true;
+ rx->link = link;
+
+ if (rx->sta) {
+ rx->link_sta =
+ rcu_dereference(rx->sta->link[rx->link_id]);
+ if (!rx->link_sta)
+ return true;
+ }
+ } else {
+ if (rx->sta)
+ rx->link_sta = &rx->sta->deflink;
+
+ rx->link = &sdata->deflink;
+ }
+
+ if (unlikely(!is_multicast_ether_addr(hdr->addr1) &&
+ rx->link_id >= 0 && rx->sta && rx->sta->sta.mlo)) {
+ link_sta = rcu_dereference(rx->sta->link[rx->link_id]);
+
+ if (WARN_ON_ONCE(!link_sta))
+ return true;
+ }
+
if (!consume) {
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb) {
+ struct skb_shared_hwtstamps *shwt;
+
+ rx->skb = skb_copy(skb, GFP_ATOMIC);
+ if (!rx->skb) {
if (net_ratelimit())
wiphy_debug(local->hw.wiphy,
"failed to copy skb for %s\n",
@@ -4688,7 +4854,28 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
return true;
}
- rx->skb = skb;
+ /* skb_copy() does not copy the hw timestamps, so copy it
+ * explicitly
+ */
+ shwt = skb_hwtstamps(rx->skb);
+ shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+ }
+
+ if (unlikely(link_sta)) {
+ /* translate to MLD addresses */
+ if (ether_addr_equal(link->conf->addr, hdr->addr1))
+ ether_addr_copy(hdr->addr1, rx->sdata->vif.addr);
+ if (ether_addr_equal(link_sta->addr, hdr->addr2))
+ ether_addr_copy(hdr->addr2, rx->sta->addr);
+ /* translate A3 only if it's the BSSID */
+ if (!ieee80211_has_tods(hdr->frame_control) &&
+ !ieee80211_has_fromds(hdr->frame_control)) {
+ if (ether_addr_equal(link_sta->addr, hdr->addr3))
+ ether_addr_copy(hdr->addr3, rx->sta->addr);
+ else if (ether_addr_equal(link->conf->addr, hdr->addr3))
+ ether_addr_copy(hdr->addr3, rx->sdata->vif.addr);
+ }
+ /* not needed for A4 since it can only carry the SA */
}
ieee80211_invoke_rx_handlers(rx);
@@ -4701,6 +4888,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
struct list_head *list)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_fast_rx *fast_rx;
struct ieee80211_rx_data rx;
@@ -4708,6 +4896,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
rx.skb = skb;
rx.local = local;
rx.list = list;
+ rx.link_id = -1;
I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
@@ -4721,6 +4910,31 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
rx.sta = container_of(pubsta, struct sta_info, sta);
rx.sdata = rx.sta->sdata;
+ if (status->link_valid &&
+ !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id))
+ goto drop;
+
+ /*
+ * TODO: Should the frame be dropped if the right link_id is not
+ * available? Or may be it is fine in the current form to proceed with
+ * the frame processing because with frame being in 802.3 format,
+ * link_id is used only for stats purpose and updating the stats on
+ * the deflink is fine?
+ */
+ if (status->link_valid)
+ rx.link_id = status->link_id;
+
+ if (rx.link_id >= 0) {
+ struct ieee80211_link_data *link;
+
+ link = rcu_dereference(rx.sdata->link[rx.link_id]);
+ if (!link)
+ goto drop;
+ rx.link = link;
+ } else {
+ rx.link = &rx.sdata->deflink;
+ }
+
fast_rx = rcu_dereference(rx.sta->fast_rx);
if (!fast_rx)
goto drop;
@@ -4732,6 +4946,41 @@ drop:
dev_kfree_skb(skb);
}
+static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
+ struct sk_buff *skb, bool consume)
+{
+ struct link_sta_info *link_sta;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+
+ /*
+ * Look up link station first, in case there's a
+ * chance that they might have a link address that
+ * is identical to the MLD address, that way we'll
+ * have the link information if needed.
+ */
+ link_sta = link_sta_info_get_bss(rx->sdata, hdr->addr2);
+ if (link_sta) {
+ rx->sta = link_sta->sta;
+ rx->link_id = link_sta->link_id;
+ } else {
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+
+ rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2);
+ if (rx->sta) {
+ if (status->link_valid &&
+ !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta,
+ status->link_id))
+ return false;
+
+ rx->link_id = status->link_valid ? status->link_id : -1;
+ } else {
+ rx->link_id = -1;
+ }
+ }
+
+ return ieee80211_prepare_and_rx_handle(rx, skb, consume);
+}
+
/*
* This is the actual Rx frames handler. as it belongs to Rx path it must
* be called with rcu_read_lock protection.
@@ -4742,6 +4991,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
struct list_head *list)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_sub_if_data *sdata;
struct ieee80211_hdr *hdr;
__le16 fc;
@@ -4755,6 +5005,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
rx.skb = skb;
rx.local = local;
rx.list = list;
+ rx.link_id = -1;
if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
@@ -4785,10 +5036,39 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (ieee80211_is_data(fc)) {
struct sta_info *sta, *prev_sta;
+ u8 link_id = status->link_id;
if (pubsta) {
rx.sta = container_of(pubsta, struct sta_info, sta);
rx.sdata = rx.sta->sdata;
+
+ if (status->link_valid &&
+ !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id))
+ goto out;
+
+ if (status->link_valid)
+ rx.link_id = status->link_id;
+
+ /*
+ * In MLO connection, fetch the link_id using addr2
+ * when the driver does not pass link_id in status.
+ * When the address translation is already performed by
+ * driver/hw, the valid link_id must be passed in
+ * status.
+ */
+
+ if (!status->link_valid && pubsta->mlo) {
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+ struct link_sta_info *link_sta;
+
+ link_sta = link_sta_info_get_bss(rx.sdata,
+ hdr->addr2);
+ if (!link_sta)
+ goto out;
+
+ rx.link_id = link_sta->link_id;
+ }
+
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
return;
goto out;
@@ -4802,6 +5082,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
continue;
}
+ if ((status->link_valid &&
+ !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
+ link_id)) ||
+ (!status->link_valid && prev_sta->sta.mlo))
+ continue;
+
+ rx.link_id = status->link_valid ? link_id : -1;
rx.sta = prev_sta;
rx.sdata = prev_sta->sdata;
ieee80211_prepare_and_rx_handle(&rx, skb, false);
@@ -4810,6 +5097,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
}
if (prev_sta) {
+ if ((status->link_valid &&
+ !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
+ link_id)) ||
+ (!status->link_valid && prev_sta->sta.mlo))
+ goto out;
+
+ rx.link_id = status->link_valid ? link_id : -1;
rx.sta = prev_sta;
rx.sdata = prev_sta->sdata;
@@ -4840,18 +5134,16 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
continue;
}
- rx.sta = sta_info_get_bss(prev, hdr->addr2);
rx.sdata = prev;
- ieee80211_prepare_and_rx_handle(&rx, skb, false);
+ ieee80211_rx_for_interface(&rx, skb, false);
prev = sdata;
}
if (prev) {
- rx.sta = sta_info_get_bss(prev, hdr->addr2);
rx.sdata = prev;
- if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+ if (ieee80211_rx_for_interface(&rx, skb, true))
return;
}
@@ -4954,6 +5246,9 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
}
}
+ if (WARN_ON_ONCE(status->link_id >= IEEE80211_LINK_UNSPECIFIED))
+ goto drop;
+
status->rx_flags = 0;
kcov_remote_start_common(skb_get_kcov_handle(skb));
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index 4141bc80cdfd..c1f964e9991c 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -11,8 +11,8 @@
void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
{
/* avoid indicating legacy bitrates for S1G STAs */
- sta->tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS;
- sta->rx_stats.last_rate =
+ sta->deflink.tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS;
+ sta->deflink.rx_stats.last_rate =
STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G);
}
@@ -112,6 +112,9 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
goto out;
}
+ /* TWT Information not supported yet */
+ twt->control |= IEEE80211_TWT_CONTROL_RX_DISABLED;
+
drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt);
out:
ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5e6b275afc9e..dc3cdee51e66 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -177,7 +177,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
rcu_read_lock();
scan_sdata = rcu_dereference(local->scan_sdata);
if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
- scan_sdata->vif.bss_conf.assoc &&
+ scan_sdata->vif.cfg.assoc &&
ieee80211_have_rx_timestamp(rx_status)) {
bss_meta.parent_tsf =
ieee80211_calculate_rx_timestamp(local, rx_status,
@@ -209,8 +209,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
if (baselen > len)
return NULL;
- elems = ieee802_11_parse_elems(elements, len - baselen, false,
- mgmt->bssid, cbss->bssid);
+ elems = ieee802_11_parse_elems(elements, len - baselen, false, cbss);
if (!elems)
return NULL;
@@ -221,16 +220,21 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
bss = (void *)cbss->priv;
ieee80211_update_bss_from_elems(local, bss, elems, rx_status, beacon);
+ kfree(elems);
list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) {
non_tx_bss = (void *)non_tx_cbss->priv;
+ elems = ieee802_11_parse_elems(elements, len - baselen, false,
+ non_tx_cbss);
+ if (!elems)
+ continue;
+
ieee80211_update_bss_from_elems(local, non_tx_bss, elems,
rx_status, beacon);
+ kfree(elems);
}
- kfree(elems);
-
return bss;
}
@@ -281,6 +285,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (likely(!sdata1 && !sdata2))
return;
+ if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
+ /*
+ * we were passive scanning because of radar/no-IR, but
+ * the beacon/proberesp rx gives us an opportunity to upgrade
+ * to active scan
+ */
+ set_bit(SCAN_BEACON_DONE, &local->scanning);
+ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ }
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -455,20 +469,23 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
scan_req = rcu_dereference_protected(local->scan_req,
lockdep_is_held(&local->mtx));
- if (scan_req != local->int_scan_req) {
- local->scan_info.aborted = aborted;
- cfg80211_scan_done(scan_req, &local->scan_info);
- }
RCU_INIT_POINTER(local->scan_req, NULL);
RCU_INIT_POINTER(local->scan_sdata, NULL);
local->scanning = 0;
local->scan_chandef.chan = NULL;
+ synchronize_rcu();
+
+ if (scan_req != local->int_scan_req) {
+ local->scan_info.aborted = aborted;
+ cfg80211_scan_done(scan_req, &local->scan_info);
+ }
+
/* Set power back to normal operating levels. */
ieee80211_hw_config(local, 0);
- if (!hw_scan) {
+ if (!hw_scan && was_scanning) {
ieee80211_configure_filter(local);
drv_sw_scan_complete(local, scan_sdata);
ieee80211_offchannel_return(local);
@@ -624,7 +641,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) {
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- u16 sn = get_random_u32();
+ u16 sn = get_random_u16();
info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO;
hdr->seq_ctrl =
@@ -787,6 +804,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHAN_RADAR)) ||
!req->n_ssids) {
next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ if (req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
} else {
ieee80211_scan_state_send_probe(local, &next_delay);
next_delay = IEEE80211_CHANNEL_TIME;
@@ -998,6 +1017,8 @@ set_channel:
!scan_req->n_ssids) {
*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
+ if (scan_req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
return;
}
@@ -1090,6 +1111,8 @@ void ieee80211_scan_work(struct work_struct *work)
goto out;
}
+ clear_bit(SCAN_BEACON_WAIT, &local->scanning);
+
/*
* as long as no delay is required advance immediately
* without scheduling a new work
@@ -1100,6 +1123,10 @@ void ieee80211_scan_work(struct work_struct *work)
goto out_complete;
}
+ if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) &&
+ local->next_scan_state == SCAN_DECISION)
+ local->next_scan_state = SCAN_SEND_PROBE;
+
switch (local->next_scan_state) {
case SCAN_DECISION:
/* if no more bands/channels left, complete scan */
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 76747bfdaddd..871cdac2d0f4 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2008, Intel Corporation
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018, 2020 Intel Corporation
+ * Copyright (C) 2018, 2020, 2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -23,7 +23,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
u32 vht_cap_info,
- u32 sta_flags, u8 *bssid,
+ ieee80211_conn_flags_t conn_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie)
{
enum nl80211_band new_band = current_band;
@@ -40,13 +40,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
sec_chan_offs = elems->sec_chan_offs;
wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
- if (sta_flags & (IEEE80211_STA_DISABLE_HT |
- IEEE80211_STA_DISABLE_40MHZ)) {
+ if (conn_flags & (IEEE80211_CONN_DISABLE_HT |
+ IEEE80211_CONN_DISABLE_40MHZ)) {
sec_chan_offs = NULL;
wide_bw_chansw_ie = NULL;
}
- if (sta_flags & IEEE80211_STA_DISABLE_VHT)
+ if (conn_flags & IEEE80211_CONN_DISABLE_VHT)
wide_bw_chansw_ie = NULL;
if (elems->ext_chansw_ie) {
@@ -93,7 +93,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
if (sec_chan_offs) {
secondary_channel_offset = sec_chan_offs->sec_chan_offs;
- } else if (!(sta_flags & IEEE80211_STA_DISABLE_HT)) {
+ } else if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) {
/* If the secondary channel offset IE is not present,
* we can't know what's the post-CSA offset, so the
* best we can do is use 20MHz.
@@ -160,10 +160,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
&new_vht_chandef))
new_vht_chandef.chan = NULL;
- if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
+ if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ &&
new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
ieee80211_chandef_downgrade(&new_vht_chandef);
- if (sta_flags & IEEE80211_STA_DISABLE_160MHZ &&
+ if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ &&
new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
ieee80211_chandef_downgrade(&new_vht_chandef);
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 537535a88990..cebfd148bb40 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -64,6 +64,12 @@
* freed before they are done using it.
*/
+struct sta_link_alloc {
+ struct link_sta_info info;
+ struct ieee80211_link_sta sta;
+ struct rcu_head rcu_head;
+};
+
static const struct rhashtable_params sta_rht_params = {
.nelem_hint = 3, /* start small */
.automatic_shrinking = true,
@@ -73,6 +79,15 @@ static const struct rhashtable_params sta_rht_params = {
.max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};
+static const struct rhashtable_params link_sta_rht_params = {
+ .nelem_hint = 3, /* start small */
+ .automatic_shrinking = true,
+ .head_offset = offsetof(struct link_sta_info, link_hash_node),
+ .key_offset = offsetof(struct link_sta_info, addr),
+ .key_len = ETH_ALEN,
+ .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
+};
+
/* Caller must hold local->sta_mtx */
static int sta_info_hash_del(struct ieee80211_local *local,
struct sta_info *sta)
@@ -81,6 +96,24 @@ static int sta_info_hash_del(struct ieee80211_local *local,
sta_rht_params);
}
+static int link_sta_info_hash_add(struct ieee80211_local *local,
+ struct link_sta_info *link_sta)
+{
+ lockdep_assert_held(&local->sta_mtx);
+ return rhltable_insert(&local->link_sta_hash,
+ &link_sta->link_hash_node,
+ link_sta_rht_params);
+}
+
+static int link_sta_info_hash_del(struct ieee80211_local *local,
+ struct link_sta_info *link_sta)
+{
+ lockdep_assert_held(&local->sta_mtx);
+ return rhltable_remove(&local->link_sta_hash,
+ &link_sta->link_hash_node,
+ link_sta_rht_params);
+}
+
static void __cleanup_single_sta(struct sta_info *sta)
{
int ac, i;
@@ -210,6 +243,74 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
return NULL;
}
+struct rhlist_head *link_sta_info_hash_lookup(struct ieee80211_local *local,
+ const u8 *addr)
+{
+ return rhltable_lookup(&local->link_sta_hash, addr,
+ link_sta_rht_params);
+}
+
+struct link_sta_info *
+link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct rhlist_head *tmp;
+ struct link_sta_info *link_sta;
+
+ rcu_read_lock();
+ for_each_link_sta_info(local, addr, link_sta, tmp) {
+ struct sta_info *sta = link_sta->sta;
+
+ if (sta->sdata == sdata ||
+ (sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
+ rcu_read_unlock();
+ /* this is safe as the caller must already hold
+ * another rcu read section or the mutex
+ */
+ return link_sta;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
+struct ieee80211_sta *
+ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw,
+ const u8 *addr,
+ const u8 *localaddr,
+ unsigned int *link_id)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct link_sta_info *link_sta;
+ struct rhlist_head *tmp;
+
+ for_each_link_sta_info(local, addr, link_sta, tmp) {
+ struct sta_info *sta = link_sta->sta;
+ struct ieee80211_link_data *link;
+ u8 _link_id = link_sta->link_id;
+
+ if (!localaddr) {
+ if (link_id)
+ *link_id = _link_id;
+ return &sta->sta;
+ }
+
+ link = rcu_dereference(sta->sdata->link[_link_id]);
+ if (!link)
+ continue;
+
+ if (memcmp(link->conf->addr, localaddr, ETH_ALEN))
+ continue;
+
+ if (link_id)
+ *link_id = _link_id;
+ return &sta->sta;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_link_addrs);
+
struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
const u8 *sta_addr, const u8 *vif_addr)
{
@@ -245,6 +346,40 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
return NULL;
}
+static void sta_info_free_link(struct link_sta_info *link_sta)
+{
+ free_percpu(link_sta->pcpu_rx_stats);
+}
+
+static void sta_remove_link(struct sta_info *sta, unsigned int link_id,
+ bool unhash)
+{
+ struct sta_link_alloc *alloc = NULL;
+ struct link_sta_info *link_sta;
+
+ link_sta = rcu_dereference_protected(sta->link[link_id],
+ lockdep_is_held(&sta->local->sta_mtx));
+
+ if (WARN_ON(!link_sta))
+ return;
+
+ if (unhash)
+ link_sta_info_hash_del(sta->local, link_sta);
+
+ if (link_sta != &sta->deflink)
+ alloc = container_of(link_sta, typeof(*alloc), info);
+
+ sta->sta.valid_links &= ~BIT(link_id);
+ RCU_INIT_POINTER(sta->link[link_id], NULL);
+ RCU_INIT_POINTER(sta->sta.link[link_id], NULL);
+ if (alloc) {
+ sta_info_free_link(&alloc->info);
+ kfree_rcu(alloc, rcu_head);
+ }
+
+ ieee80211_sta_recalc_aggregates(&sta->sta);
+}
+
/**
* sta_info_free - free STA
*
@@ -258,6 +393,15 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
*/
void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
+ if (!(sta->sta.valid_links & BIT(i)))
+ continue;
+
+ sta_remove_link(sta, i, false);
+ }
+
/*
* If we had used sta_info_pre_move_state() then we might not
* have gone through the state transitions down again, so do
@@ -287,7 +431,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
#ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh);
#endif
- free_percpu(sta->pcpu_rx_stats);
+
+ sta_info_free_link(&sta->deflink);
kfree(sta);
}
@@ -333,8 +478,51 @@ static int sta_prepare_rate_control(struct ieee80211_local *local,
return 0;
}
-struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
- const u8 *addr, gfp_t gfp)
+static int sta_info_alloc_link(struct ieee80211_local *local,
+ struct link_sta_info *link_info,
+ gfp_t gfp)
+{
+ struct ieee80211_hw *hw = &local->hw;
+ int i;
+
+ if (ieee80211_hw_check(hw, USES_RSS)) {
+ link_info->pcpu_rx_stats =
+ alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
+ if (!link_info->pcpu_rx_stats)
+ return -ENOMEM;
+ }
+
+ link_info->rx_stats.last_rx = jiffies;
+ u64_stats_init(&link_info->rx_stats.syncp);
+
+ ewma_signal_init(&link_info->rx_stats_avg.signal);
+ ewma_avg_signal_init(&link_info->status_stats.avg_ack_signal);
+ for (i = 0; i < ARRAY_SIZE(link_info->rx_stats_avg.chain_signal); i++)
+ ewma_signal_init(&link_info->rx_stats_avg.chain_signal[i]);
+
+ return 0;
+}
+
+static void sta_info_add_link(struct sta_info *sta,
+ unsigned int link_id,
+ struct link_sta_info *link_info,
+ struct ieee80211_link_sta *link_sta)
+{
+ link_info->sta = sta;
+ link_info->link_id = link_id;
+ link_info->pub = link_sta;
+ link_sta->link_id = link_id;
+ rcu_assign_pointer(sta->link[link_id], link_info);
+ rcu_assign_pointer(sta->sta.link[link_id], link_sta);
+
+ link_sta->smps_mode = IEEE80211_SMPS_OFF;
+ link_sta->agg.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA;
+}
+
+static struct sta_info *
+__sta_info_alloc(struct ieee80211_sub_if_data *sdata,
+ const u8 *addr, int link_id, const u8 *link_addr,
+ gfp_t gfp)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_hw *hw = &local->hw;
@@ -345,13 +533,22 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (!sta)
return NULL;
- if (ieee80211_hw_check(hw, USES_RSS)) {
- sta->pcpu_rx_stats =
- alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
- if (!sta->pcpu_rx_stats)
- goto free;
+ sta->local = local;
+ sta->sdata = sdata;
+
+ if (sta_info_alloc_link(local, &sta->deflink, gfp))
+ goto free;
+
+ if (link_id >= 0) {
+ sta_info_add_link(sta, link_id, &sta->deflink,
+ &sta->sta.deflink);
+ sta->sta.valid_links = BIT(link_id);
+ } else {
+ sta_info_add_link(sta, 0, &sta->deflink, &sta->sta.deflink);
}
+ sta->sta.cur = &sta->sta.deflink.agg;
+
spin_lock_init(&sta->lock);
spin_lock_init(&sta->ps_lock);
INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
@@ -364,8 +561,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
goto free;
sta->mesh->plink_sta = sta;
spin_lock_init(&sta->mesh->plink_lock);
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- !sdata->u.mesh.user_mpm)
+ if (!sdata->u.mesh.user_mpm)
timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
0);
sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
@@ -374,9 +570,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
memcpy(sta->addr, addr, ETH_ALEN);
memcpy(sta->sta.addr, addr, ETH_ALEN);
+ memcpy(sta->deflink.addr, link_addr, ETH_ALEN);
+ memcpy(sta->sta.deflink.addr, link_addr, ETH_ALEN);
sta->sta.max_rx_aggregation_subframes =
local->hw.max_rx_aggregation_subframes;
+ /* TODO link specific alloc and assignments for MLO Link STA */
+
/* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only.
* The Tx path starts to use a key as soon as the key slot ptk_idx
* references to is not NULL. To not use the initial Rx-only key
@@ -386,11 +586,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
BUILD_BUG_ON(ARRAY_SIZE(sta->ptk) <= INVALID_PTK_KEYIDX);
sta->ptk_idx = INVALID_PTK_KEYIDX;
- sta->local = local;
- sta->sdata = sdata;
- sta->rx_stats.last_rx = jiffies;
-
- u64_stats_init(&sta->rx_stats.syncp);
ieee80211_init_frag_cache(&sta->frags);
@@ -400,10 +595,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->reserved_tid = IEEE80211_TID_UNRESERVED;
sta->last_connected = ktime_get_seconds();
- ewma_signal_init(&sta->rx_stats_avg.signal);
- ewma_avg_signal_init(&sta->status_stats.avg_ack_signal);
- for (i = 0; i < ARRAY_SIZE(sta->rx_stats_avg.chain_signal); i++)
- ewma_signal_init(&sta->rx_stats_avg.chain_signal[i]);
if (local->ops->wake_tx_queue) {
void *txq_data;
@@ -425,11 +616,15 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (sta_prepare_rate_control(local, sta, gfp))
goto free_txq;
+ sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
skb_queue_head_init(&sta->ps_tx_buf[i]);
skb_queue_head_init(&sta->tx_filtered[i]);
- init_airtime_info(&sta->airtime[i], &local->airtime[i]);
+ sta->airtime[i].deficit = sta->airtime_weight;
+ atomic_set(&sta->airtime[i].aql_tx_pending, 0);
+ sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
+ sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
}
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -473,43 +668,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (!(rate->flags & mandatory))
continue;
- sta->sta.supp_rates[i] |= BIT(r);
- }
- }
-
- sta->sta.smps_mode = IEEE80211_SMPS_OFF;
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
- struct ieee80211_supported_band *sband;
- u8 smps;
-
- sband = ieee80211_get_sband(sdata);
- if (!sband)
- goto free_txq;
-
- smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >>
- IEEE80211_HT_CAP_SM_PS_SHIFT;
- /*
- * Assume that hostapd advertises our caps in the beacon and
- * this is the known_smps_mode for a station that just assciated
- */
- switch (smps) {
- case WLAN_HT_SMPS_CONTROL_DISABLED:
- sta->known_smps_mode = IEEE80211_SMPS_OFF;
- break;
- case WLAN_HT_SMPS_CONTROL_STATIC:
- sta->known_smps_mode = IEEE80211_SMPS_STATIC;
- break;
- case WLAN_HT_SMPS_CONTROL_DYNAMIC:
- sta->known_smps_mode = IEEE80211_SMPS_DYNAMIC;
- break;
- default:
- WARN_ON(1);
+ sta->sta.deflink.supp_rates[i] |= BIT(r);
}
}
- sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA;
-
sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD;
sta->cparams.target = MS2TIME(20);
sta->cparams.interval = MS2TIME(100);
@@ -525,7 +687,7 @@ free_txq:
if (sta->sta.txq[0])
kfree(to_txq_info(sta->sta.txq[0]));
free:
- free_percpu(sta->pcpu_rx_stats);
+ sta_info_free_link(&sta->deflink);
#ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh);
#endif
@@ -533,6 +695,21 @@ free:
return NULL;
}
+struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
+ const u8 *addr, gfp_t gfp)
+{
+ return __sta_info_alloc(sdata, addr, -1, addr, gfp);
+}
+
+struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata,
+ const u8 *mld_addr,
+ unsigned int link_id,
+ const u8 *link_addr,
+ gfp_t gfp)
+{
+ return __sta_info_alloc(sdata, mld_addr, link_id, link_addr, gfp);
+}
+
static int sta_info_insert_check(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -623,7 +800,8 @@ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_P2P_PS);
}
}
@@ -665,13 +843,21 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
if (err)
goto out_drop_sta;
+ if (sta->sta.valid_links) {
+ err = link_sta_info_hash_add(local, &sta->deflink);
+ if (err) {
+ sta_info_hash_del(local, sta);
+ goto out_drop_sta;
+ }
+ }
+
list_add_tail_rcu(&sta->list, &local->sta_list);
/* update channel context before notifying the driver about state
* change, this enables driver using the updated channel context right away.
*/
if (sta->sta_state >= IEEE80211_STA_ASSOC) {
- ieee80211_recalc_min_chandef(sta->sdata);
+ ieee80211_recalc_min_chandef(sta->sdata, -1);
if (!sta->sta.support_p2p_ps)
ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
}
@@ -704,6 +890,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
return 0;
out_remove:
+ if (sta->sta.valid_links)
+ link_sta_info_hash_del(local, &sta->deflink);
sta_info_hash_del(local, sta);
list_del_rcu(&sta->list);
out_drop_sta:
@@ -998,7 +1186,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
{
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
- int ret;
+ int ret, i;
might_sleep();
@@ -1026,6 +1214,18 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
*/
drv_sync_rx_queues(local, sta);
+ for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
+ struct link_sta_info *link_sta;
+
+ if (!(sta->sta.valid_links & BIT(i)))
+ continue;
+
+ link_sta = rcu_dereference_protected(sta->link[i],
+ lockdep_is_held(&local->sta_mtx));
+
+ link_sta_info_hash_del(local, link_sta);
+ }
+
ret = sta_info_hash_del(local, sta);
if (WARN_ON(ret))
return ret;
@@ -1182,6 +1382,12 @@ int sta_info_init(struct ieee80211_local *local)
if (err)
return err;
+ err = rhltable_init(&local->link_sta_hash, &link_sta_rht_params);
+ if (err) {
+ rhltable_destroy(&local->sta_hash);
+ return err;
+ }
+
spin_lock_init(&local->tim_lock);
mutex_init(&local->sta_mtx);
INIT_LIST_HEAD(&local->sta_list);
@@ -1194,6 +1400,7 @@ void sta_info_stop(struct ieee80211_local *local)
{
del_timer_sync(&local->sta_cleanup);
rhltable_destroy(&local->sta_hash);
+ rhltable_destroy(&local->link_sta_hash);
}
@@ -1460,7 +1667,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
skb->dev = sdata->dev;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
rcu_read_unlock();
kfree_skb(skb);
@@ -1894,61 +2101,69 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
-void ieee80211_register_airtime(struct ieee80211_txq *txq,
- u32 tx_airtime, u32 rx_airtime)
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+ u32 tx_airtime, u32 rx_airtime)
{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
- struct ieee80211_local *local = sdata->local;
- u64 weight_sum, weight_sum_reciprocal;
- struct airtime_sched_info *air_sched;
- struct airtime_info *air_info;
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+ struct ieee80211_local *local = sta->sdata->local;
+ u8 ac = ieee80211_ac_from_tid(tid);
u32 airtime = 0;
+ u32 diff;
- air_sched = &local->airtime[txq->ac];
- air_info = to_airtime_info(txq);
-
- if (local->airtime_flags & AIRTIME_USE_TX)
+ if (sta->local->airtime_flags & AIRTIME_USE_TX)
airtime += tx_airtime;
- if (local->airtime_flags & AIRTIME_USE_RX)
+ if (sta->local->airtime_flags & AIRTIME_USE_RX)
airtime += rx_airtime;
- /* Weights scale so the unit weight is 256 */
- airtime <<= 8;
+ spin_lock_bh(&local->active_txq_lock[ac]);
+ sta->airtime[ac].tx_airtime += tx_airtime;
+ sta->airtime[ac].rx_airtime += rx_airtime;
- spin_lock_bh(&air_sched->lock);
+ diff = (u32)jiffies - sta->airtime[ac].last_active;
+ if (diff <= AIRTIME_ACTIVE_DURATION)
+ sta->airtime[ac].deficit -= airtime;
- air_info->tx_airtime += tx_airtime;
- air_info->rx_airtime += rx_airtime;
-
- if (air_sched->weight_sum) {
- weight_sum = air_sched->weight_sum;
- weight_sum_reciprocal = air_sched->weight_sum_reciprocal;
- } else {
- weight_sum = air_info->weight;
- weight_sum_reciprocal = air_info->weight_reciprocal;
- }
-
- /* Round the calculation of global vt */
- air_sched->v_t += (u64)((airtime + (weight_sum >> 1)) *
- weight_sum_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_64;
- air_info->v_t += (u32)((airtime + (air_info->weight >> 1)) *
- air_info->weight_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_32;
- ieee80211_resort_txq(&local->hw, txq);
-
- spin_unlock_bh(&air_sched->lock);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
}
+EXPORT_SYMBOL(ieee80211_sta_register_airtime);
-void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
- u32 tx_airtime, u32 rx_airtime)
+void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta)
{
- struct ieee80211_txq *txq = pubsta->txq[tid];
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+ struct ieee80211_link_sta *link_sta;
+ int link_id, i;
+ bool first = true;
- if (!txq)
+ if (!pubsta->valid_links || !pubsta->mlo) {
+ pubsta->cur = &pubsta->deflink.agg;
return;
+ }
+
+ rcu_read_lock();
+ for_each_sta_active_link(&sta->sdata->vif, pubsta, link_sta, link_id) {
+ if (first) {
+ sta->cur = pubsta->deflink.agg;
+ first = false;
+ continue;
+ }
+
+ sta->cur.max_amsdu_len =
+ min(sta->cur.max_amsdu_len,
+ link_sta->agg.max_amsdu_len);
+ sta->cur.max_rc_amsdu_len =
+ min(sta->cur.max_rc_amsdu_len,
+ link_sta->agg.max_rc_amsdu_len);
- ieee80211_register_airtime(txq, tx_airtime, rx_airtime);
+ for (i = 0; i < ARRAY_SIZE(sta->cur.max_tid_amsdu_len); i++)
+ sta->cur.max_tid_amsdu_len[i] =
+ min(sta->cur.max_tid_amsdu_len[i],
+ link_sta->agg.max_tid_amsdu_len[i]);
+ }
+ rcu_read_unlock();
+
+ pubsta->cur = &sta->cur;
}
-EXPORT_SYMBOL(ieee80211_sta_register_airtime);
+EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates);
void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
struct sta_info *sta, u8 ac,
@@ -1965,6 +2180,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
&sta->airtime[ac].aql_tx_pending);
atomic_add(tx_airtime, &local->aql_total_pending_airtime);
+ atomic_add(tx_airtime, &local->aql_ac_pending_airtime[ac]);
return;
}
@@ -1976,14 +2192,17 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
tx_pending, 0);
}
+ atomic_sub(tx_airtime, &local->aql_total_pending_airtime);
tx_pending = atomic_sub_return(tx_airtime,
- &local->aql_total_pending_airtime);
+ &local->aql_ac_pending_airtime[ac]);
if (WARN_ONCE(tx_pending < 0,
"Device %s AC %d pending airtime underflow: %u, %u",
wiphy_name(local->hw.wiphy), ac, tx_pending,
- tx_airtime))
- atomic_cmpxchg(&local->aql_total_pending_airtime,
+ tx_airtime)) {
+ atomic_cmpxchg(&local->aql_ac_pending_airtime[ac],
tx_pending, 0);
+ atomic_sub(tx_pending, &local->aql_total_pending_airtime);
+ }
}
int sta_info_move_state(struct sta_info *sta,
@@ -2046,7 +2265,7 @@ int sta_info_move_state(struct sta_info *sta,
set_bit(WLAN_STA_AUTH, &sta->_flags);
} else if (sta->sta_state == IEEE80211_STA_ASSOC) {
clear_bit(WLAN_STA_ASSOC, &sta->_flags);
- ieee80211_recalc_min_chandef(sta->sdata);
+ ieee80211_recalc_min_chandef(sta->sdata, -1);
if (!sta->sta.support_p2p_ps)
ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
}
@@ -2055,7 +2274,7 @@ int sta_info_move_state(struct sta_info *sta,
if (sta->sta_state == IEEE80211_STA_AUTH) {
set_bit(WLAN_STA_ASSOC, &sta->_flags);
sta->assoc_at = ktime_get_boottime_ns();
- ieee80211_recalc_min_chandef(sta->sdata);
+ ieee80211_recalc_min_chandef(sta->sdata, -1);
if (!sta->sta.support_p2p_ps)
ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
} else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
@@ -2086,54 +2305,19 @@ int sta_info_move_state(struct sta_info *sta,
return 0;
}
-u8 sta_info_tx_streams(struct sta_info *sta)
-{
- struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.ht_cap;
- u8 rx_streams;
-
- if (!sta->sta.ht_cap.ht_supported)
- return 1;
-
- if (sta->sta.vht_cap.vht_supported) {
- int i;
- u16 tx_mcs_map =
- le16_to_cpu(sta->sta.vht_cap.vht_mcs.tx_mcs_map);
-
- for (i = 7; i >= 0; i--)
- if ((tx_mcs_map & (0x3 << (i * 2))) !=
- IEEE80211_VHT_MCS_NOT_SUPPORTED)
- return i + 1;
- }
-
- if (ht_cap->mcs.rx_mask[3])
- rx_streams = 4;
- else if (ht_cap->mcs.rx_mask[2])
- rx_streams = 3;
- else if (ht_cap->mcs.rx_mask[1])
- rx_streams = 2;
- else
- rx_streams = 1;
-
- if (!(ht_cap->mcs.tx_params & IEEE80211_HT_MCS_TX_RX_DIFF))
- return rx_streams;
-
- return ((ht_cap->mcs.tx_params & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK)
- >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
-}
-
static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
- struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
+ struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats;
int cpu;
- if (!sta->pcpu_rx_stats)
+ if (!sta->deflink.pcpu_rx_stats)
return stats;
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpustats;
- cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);
if (time_after(cpustats->last_rx, stats->last_rx))
stats = cpustats;
@@ -2212,9 +2396,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats,
u64 value;
do {
- start = u64_stats_fetch_begin(&rxstats->syncp);
+ start = u64_stats_fetch_begin_irq(&rxstats->syncp);
value = rxstats->msdu[tid];
- } while (u64_stats_fetch_retry(&rxstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start));
return value;
}
@@ -2227,13 +2411,15 @@ static void sta_set_tidstats(struct sta_info *sta,
int cpu;
if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
- tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid);
+ tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats,
+ tid);
- if (sta->pcpu_rx_stats) {
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpu);
tidstats->rx_msdu +=
sta_get_tidstats_msdu(cpurxs, tid);
}
@@ -2244,19 +2430,19 @@ static void sta_set_tidstats(struct sta_info *sta,
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
- tidstats->tx_msdu = sta->tx_stats.msdu[tid];
+ tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid];
}
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
- tidstats->tx_msdu_retries = sta->status_stats.msdu_retries[tid];
+ tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid];
}
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
- tidstats->tx_msdu_failed = sta->status_stats.msdu_failed[tid];
+ tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid];
}
if (local->ops->wake_tx_queue && tid < IEEE80211_NUM_TIDS) {
@@ -2278,9 +2464,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats)
u64 value;
do {
- start = u64_stats_fetch_begin(&rxstats->syncp);
+ start = u64_stats_fetch_begin_irq(&rxstats->syncp);
value = rxstats->bytes;
- } while (u64_stats_fetch_retry(&rxstats->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start));
return value;
}
@@ -2303,7 +2489,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
* (or just modify the value entirely, of course)
*/
if (sdata->vif.type == NL80211_IFTYPE_STATION)
- sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal;
+ sinfo->rx_beacon = sdata->deflink.u.mgd.count_beacon_signal;
drv_sta_statistics(local, sdata, &sta->sta, sinfo);
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
@@ -2314,7 +2500,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
if (sdata->vif.type == NL80211_IFTYPE_STATION) {
- sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count;
+ sinfo->beacon_loss_count =
+ sdata->deflink.u.mgd.beacon_loss_count;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS);
}
@@ -2327,26 +2514,27 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
sinfo->tx_bytes = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- sinfo->tx_bytes += sta->tx_stats.bytes[ac];
+ sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac];
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
sinfo->tx_packets = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- sinfo->tx_packets += sta->tx_stats.packets[ac];
+ sinfo->tx_packets += sta->deflink.tx_stats.packets[ac];
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
}
if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
- sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
+ sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats);
- if (sta->pcpu_rx_stats) {
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpu);
sinfo->rx_bytes += sta_get_stats_bytes(cpurxs);
}
}
@@ -2355,12 +2543,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
- sinfo->rx_packets = sta->rx_stats.packets;
- if (sta->pcpu_rx_stats) {
+ sinfo->rx_packets = sta->deflink.rx_stats.packets;
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpu);
sinfo->rx_packets += cpurxs->packets;
}
}
@@ -2368,12 +2557,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) {
- sinfo->tx_retries = sta->status_stats.retry_count;
+ sinfo->tx_retries = sta->deflink.status_stats.retry_count;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) {
- sinfo->tx_failed = sta->status_stats.retry_failed;
+ sinfo->tx_failed = sta->deflink.status_stats.retry_failed;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
}
@@ -2390,16 +2579,16 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
- sinfo->airtime_weight = sta->airtime[0].weight;
+ sinfo->airtime_weight = sta->airtime_weight;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
}
- sinfo->rx_dropped_misc = sta->rx_stats.dropped;
- if (sta->pcpu_rx_stats) {
+ sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped;
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);
sinfo->rx_dropped_misc += cpurxs->dropped;
}
}
@@ -2418,10 +2607,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
}
- if (!sta->pcpu_rx_stats &&
+ if (!sta->deflink.pcpu_rx_stats &&
!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) {
sinfo->signal_avg =
- -ewma_signal_read(&sta->rx_stats_avg.signal);
+ -ewma_signal_read(&sta->deflink.rx_stats_avg.signal);
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
}
}
@@ -2434,7 +2623,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) |
BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
- if (!sta->pcpu_rx_stats)
+ if (!sta->deflink.pcpu_rx_stats)
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
sinfo->chains = last_rxstats->chains;
@@ -2443,17 +2632,19 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->chain_signal[i] =
last_rxstats->chain_signal_last[i];
sinfo->chain_signal_avg[i] =
- -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]);
+ -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]);
}
}
- if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
- sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate,
+ if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) &&
+ !sta->sta.valid_links) {
+ sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate,
&sinfo->txrate);
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
}
- if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))) {
+ if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) &&
+ !sta->sta.valid_links) {
if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
}
@@ -2530,16 +2721,16 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
- sta->status_stats.ack_signal_filled) {
- sinfo->ack_signal = sta->status_stats.last_ack_signal;
+ sta->deflink.status_stats.ack_signal_filled) {
+ sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) &&
- sta->status_stats.ack_signal_filled) {
+ sta->deflink.status_stats.ack_signal_filled) {
sinfo->avg_ack_signal =
-(s8)ewma_avg_signal_read(
- &sta->status_stats.avg_ack_signal);
+ &sta->deflink.status_stats.avg_ack_signal);
sinfo->filled |=
BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
}
@@ -2574,10 +2765,10 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
- if (!sta->status_stats.last_ack ||
- time_after(stats->last_rx, sta->status_stats.last_ack))
+ if (!sta->deflink.status_stats.last_ack ||
+ time_after(stats->last_rx, sta->deflink.status_stats.last_ack))
return stats->last_rx;
- return sta->status_stats.last_ack;
+ return sta->deflink.status_stats.last_ack;
}
static void sta_update_codel_params(struct sta_info *sta, u32 thr)
@@ -2603,3 +2794,136 @@ void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
sta_update_codel_params(sta, thr);
}
+
+int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct sta_link_alloc *alloc;
+ int ret;
+
+ lockdep_assert_held(&sdata->local->sta_mtx);
+
+ /* must represent an MLD from the start */
+ if (WARN_ON(!sta->sta.valid_links))
+ return -EINVAL;
+
+ if (WARN_ON(sta->sta.valid_links & BIT(link_id) ||
+ sta->link[link_id]))
+ return -EBUSY;
+
+ alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
+ if (!alloc)
+ return -ENOMEM;
+
+ ret = sta_info_alloc_link(sdata->local, &alloc->info, GFP_KERNEL);
+ if (ret) {
+ kfree(alloc);
+ return ret;
+ }
+
+ sta_info_add_link(sta, link_id, &alloc->info, &alloc->sta);
+
+ return 0;
+}
+
+void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id)
+{
+ lockdep_assert_held(&sta->sdata->local->sta_mtx);
+
+ sta_remove_link(sta, link_id, false);
+}
+
+int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct link_sta_info *link_sta;
+ u16 old_links = sta->sta.valid_links;
+ u16 new_links = old_links | BIT(link_id);
+ int ret;
+
+ link_sta = rcu_dereference_protected(sta->link[link_id],
+ lockdep_is_held(&sdata->local->sta_mtx));
+
+ if (WARN_ON(old_links == new_links || !link_sta))
+ return -EINVAL;
+
+ rcu_read_lock();
+ if (link_sta_info_hash_lookup(sdata->local, link_sta->addr)) {
+ rcu_read_unlock();
+ return -EALREADY;
+ }
+ /* we only modify under the mutex so this is fine */
+ rcu_read_unlock();
+
+ sta->sta.valid_links = new_links;
+
+ if (!test_sta_flag(sta, WLAN_STA_INSERTED))
+ goto hash;
+
+ /* Ensure the values are updated for the driver,
+ * redone by sta_remove_link on failure.
+ */
+ ieee80211_sta_recalc_aggregates(&sta->sta);
+
+ ret = drv_change_sta_links(sdata->local, sdata, &sta->sta,
+ old_links, new_links);
+ if (ret) {
+ sta->sta.valid_links = old_links;
+ sta_remove_link(sta, link_id, false);
+ return ret;
+ }
+
+hash:
+ ret = link_sta_info_hash_add(sdata->local, link_sta);
+ WARN_ON(ret);
+ return 0;
+}
+
+void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ u16 old_links = sta->sta.valid_links;
+
+ lockdep_assert_held(&sdata->local->sta_mtx);
+
+ sta->sta.valid_links &= ~BIT(link_id);
+
+ if (test_sta_flag(sta, WLAN_STA_INSERTED))
+ drv_change_sta_links(sdata->local, sdata, &sta->sta,
+ old_links, sta->sta.valid_links);
+
+ sta_remove_link(sta, link_id, true);
+}
+
+void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta,
+ const u8 *ext_capab,
+ unsigned int ext_capab_len)
+{
+ u8 val;
+
+ sta->sta.max_amsdu_subframes = 0;
+
+ if (ext_capab_len < 8)
+ return;
+
+ /* The sender might not have sent the last bit, consider it to be 0 */
+ val = u8_get_bits(ext_capab[7], WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB);
+
+ /* we did get all the bits, take the MSB as well */
+ if (ext_capab_len >= 9)
+ val |= u8_get_bits(ext_capab[8],
+ WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB) << 1;
+
+ if (val)
+ sta->sta.max_amsdu_subframes = 4 << val;
+}
+
+#ifdef CONFIG_LOCKDEP
+bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
+{
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+
+ return lockdep_is_held(&sta->local->sta_mtx);
+}
+EXPORT_SYMBOL(lockdep_sta_mutex_held);
+#endif
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 379fd367197f..2517ea714dc4 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2021 Intel Corporation
+ * Copyright(c) 2020-2022 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -135,25 +135,19 @@ enum ieee80211_agg_stop_reason {
#define AIRTIME_USE_TX BIT(0)
#define AIRTIME_USE_RX BIT(1)
-
struct airtime_info {
u64 rx_airtime;
u64 tx_airtime;
- u64 v_t;
- u64 last_scheduled;
- struct list_head list;
+ u32 last_active;
+ s32 deficit;
atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
u32 aql_limit_low;
u32 aql_limit_high;
- u32 weight_reciprocal;
- u16 weight;
};
void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
struct sta_info *sta, u8 ac,
u16 tx_airtime, bool tx_completed);
-void ieee80211_register_airtime(struct ieee80211_txq *txq,
- u32 tx_airtime, u32 rx_airtime);
struct sta_info;
@@ -484,6 +478,92 @@ struct ieee80211_fragment_cache {
#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */
/**
+ * struct link_sta_info - Link STA information
+ * All link specific sta info are stored here for reference. This can be
+ * a single entry for non-MLD STA or multiple entries for MLD STA
+ * @addr: Link MAC address - Can be same as MLD STA mac address and is always
+ * same for non-MLD STA. This is used as key for searching link STA
+ * @link_id: Link ID uniquely identifying the link STA. This is 0 for non-MLD
+ * and set to the corresponding vif LinkId for MLD STA
+ * @link_hash_node: hash node for rhashtable
+ * @sta: Points to the STA info
+ * @gtk: group keys negotiated with this station, if any
+ * @tx_stats: TX statistics
+ * @tx_stats.packets: # of packets transmitted
+ * @tx_stats.bytes: # of bytes in all packets transmitted
+ * @tx_stats.last_rate: last TX rate
+ * @tx_stats.msdu: # of transmitted MSDUs per TID
+ * @rx_stats: RX statistics
+ * @rx_stats_avg: averaged RX statistics
+ * @rx_stats_avg.signal: averaged signal
+ * @rx_stats_avg.chain_signal: averaged per-chain signal
+ * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
+ * this (by advertising the USES_RSS hw flag)
+ * @status_stats: TX status statistics
+ * @status_stats.filtered: # of filtered frames
+ * @status_stats.retry_failed: # of frames that failed after retry
+ * @status_stats.retry_count: # of retries attempted
+ * @status_stats.lost_packets: # of lost packets
+ * @status_stats.last_pkt_time: timestamp of last ACKed packet
+ * @status_stats.msdu_retries: # of MSDU retries
+ * @status_stats.msdu_failed: # of failed MSDUs
+ * @status_stats.last_ack: last ack timestamp (jiffies)
+ * @status_stats.last_ack_signal: last ACK signal
+ * @status_stats.ack_signal_filled: last ACK signal validity
+ * @status_stats.avg_ack_signal: average ACK signal
+ * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
+ * taken from HT/VHT capabilities or VHT operating mode notification
+ * @pub: public (driver visible) link STA data
+ * TODO Move other link params from sta_info as required for MLD operation
+ */
+struct link_sta_info {
+ u8 addr[ETH_ALEN];
+ u8 link_id;
+
+ struct rhlist_head link_hash_node;
+
+ struct sta_info *sta;
+ struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
+ NUM_DEFAULT_MGMT_KEYS +
+ NUM_DEFAULT_BEACON_KEYS];
+ struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats;
+
+ /* Updated from RX path only, no locking requirements */
+ struct ieee80211_sta_rx_stats rx_stats;
+ struct {
+ struct ewma_signal signal;
+ struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS];
+ } rx_stats_avg;
+
+ /* Updated from TX status path only, no locking requirements */
+ struct {
+ unsigned long filtered;
+ unsigned long retry_failed, retry_count;
+ unsigned int lost_packets;
+ unsigned long last_pkt_time;
+ u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
+ u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
+ unsigned long last_ack;
+ s8 last_ack_signal;
+ bool ack_signal_filled;
+ struct ewma_avg_signal avg_ack_signal;
+ } status_stats;
+
+ /* Updated from TX path only, no locking requirements */
+ struct {
+ u64 packets[IEEE80211_NUM_ACS];
+ u64 bytes[IEEE80211_NUM_ACS];
+ struct ieee80211_tx_rate last_rate;
+ struct rate_info last_rate_info;
+ u64 msdu[IEEE80211_NUM_TIDS + 1];
+ } tx_stats;
+
+ enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
+
+ struct ieee80211_link_sta *pub;
+};
+
+/**
* struct sta_info - STA information
*
* This structure collects information about a station that
@@ -498,7 +578,6 @@ struct ieee80211_fragment_cache {
* @sdata: virtual interface this station belongs to
* @ptk: peer keys negotiated with this station, if any
* @ptk_idx: last installed peer key index
- * @gtk: group keys negotiated with this station, if any
* @rate_ctrl: rate control algorithm reference
* @rate_ctrl_lock: spinlock used to protect rate control data
* (data inside the algorithm, so serializes calls there)
@@ -524,6 +603,7 @@ struct ieee80211_fragment_cache {
* @tid_seq: per-TID sequence numbers for sending to this STA
* @airtime: per-AC struct airtime_info describing airtime statistics for this
* station
+ * @airtime_weight: station weight for airtime fairness calculation purposes
* @ampdu_mlme: A-MPDU state machine state
* @mesh: mesh STA information
* @debugfs_dir: debug filesystem directory dentry
@@ -535,39 +615,26 @@ struct ieee80211_fragment_cache {
* @rcu_head: RCU head used for freeing this station struct
* @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
* taken from HT/VHT capabilities or VHT operating mode notification
- * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for
- * AP only.
- * @cipher_scheme: optional cipher scheme for this station
* @cparams: CoDel parameters for this station.
* @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
* @fast_tx: TX fastpath information
* @fast_rx: RX fastpath information
* @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
* the BSS one.
- * @tx_stats: TX statistics
- * @tx_stats.packets: # of packets transmitted
- * @tx_stats.bytes: # of bytes in all packets transmitted
- * @tx_stats.last_rate: last TX rate
- * @tx_stats.msdu: # of transmitted MSDUs per TID
- * @rx_stats: RX statistics
- * @rx_stats_avg: averaged RX statistics
- * @rx_stats_avg.signal: averaged signal
- * @rx_stats_avg.chain_signal: averaged per-chain signal
- * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
- * this (by advertising the USES_RSS hw flag)
- * @status_stats: TX status statistics
- * @status_stats.filtered: # of filtered frames
- * @status_stats.retry_failed: # of frames that failed after retry
- * @status_stats.retry_count: # of retries attempted
- * @status_stats.lost_packets: # of lost packets
- * @status_stats.last_pkt_time: timestamp of last ACKed packet
- * @status_stats.msdu_retries: # of MSDU retries
- * @status_stats.msdu_failed: # of failed MSDUs
- * @status_stats.last_ack: last ack timestamp (jiffies)
- * @status_stats.last_ack_signal: last ACK signal
- * @status_stats.ack_signal_filled: last ACK signal validity
- * @status_stats.avg_ack_signal: average ACK signal
* @frags: fragment cache
+ * @cur: storage for aggregation data
+ * &struct ieee80211_sta points either here or to deflink.agg.
+ * @deflink: This is the default link STA information, for non MLO STA all link
+ * specific STA information is accessed through @deflink or through
+ * link[0] which points to address of @deflink. For MLO Link STA
+ * the first added link STA will point to deflink.
+ * @link: reference to Link Sta entries. For Non MLO STA, except 1st link,
+ * i.e link[0] all links would be assigned to NULL by default and
+ * would access link information via @deflink or link[0]. For MLO
+ * STA, first link STA being added will point its link pointer to
+ * @deflink address and remaining would be allocated and the address
+ * would be assigned to link[link_id] where link_id is the id assigned
+ * by the AP.
*/
struct sta_info {
/* General information, mostly static */
@@ -577,9 +644,6 @@ struct sta_info {
u8 addr[ETH_ALEN];
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
- NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS];
struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS];
u8 ptk_idx;
struct rate_control_ref *rate_ctrl;
@@ -589,7 +653,6 @@ struct sta_info {
struct ieee80211_fast_tx __rcu *fast_tx;
struct ieee80211_fast_rx __rcu *fast_rx;
- struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats;
#ifdef CONFIG_MAC80211_MESH
struct mesh_sta *mesh;
@@ -619,41 +682,13 @@ struct sta_info {
u64 assoc_at;
long last_connected;
- /* Updated from RX path only, no locking requirements */
- struct ieee80211_sta_rx_stats rx_stats;
- struct {
- struct ewma_signal signal;
- struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS];
- } rx_stats_avg;
-
/* Plus 1 for non-QoS frames */
__le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
- /* Updated from TX status path only, no locking requirements */
- struct {
- unsigned long filtered;
- unsigned long retry_failed, retry_count;
- unsigned int lost_packets;
- unsigned long last_pkt_time;
- u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
- u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
- unsigned long last_ack;
- s8 last_ack_signal;
- bool ack_signal_filled;
- struct ewma_avg_signal avg_ack_signal;
- } status_stats;
-
- /* Updated from TX path only, no locking requirements */
- struct {
- u64 packets[IEEE80211_NUM_ACS];
- u64 bytes[IEEE80211_NUM_ACS];
- struct ieee80211_tx_rate last_rate;
- struct rate_info last_rate_info;
- u64 msdu[IEEE80211_NUM_TIDS + 1];
- } tx_stats;
u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
struct airtime_info airtime[IEEE80211_NUM_ACS];
+ u16 airtime_weight;
/*
* Aggregation information, locked with lock.
@@ -664,11 +699,6 @@ struct sta_info {
struct dentry *debugfs_dir;
#endif
- enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
-
- enum ieee80211_smps_mode known_smps_mode;
- const struct ieee80211_cipher_scheme *cipher_scheme;
-
struct codel_params cparams;
u8 reserved_tid;
@@ -677,6 +707,10 @@ struct sta_info {
struct ieee80211_fragment_cache frags;
+ struct ieee80211_sta_aggregates cur;
+ struct link_sta_info deflink;
+ struct link_sta_info __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
+
/* keep last! */
struct ieee80211_sta sta;
};
@@ -788,6 +822,17 @@ struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
rhl_for_each_entry_rcu(_sta, _tmp, \
sta_info_hash_lookup(local, _addr), hash_node)
+struct rhlist_head *link_sta_info_hash_lookup(struct ieee80211_local *local,
+ const u8 *addr);
+
+#define for_each_link_sta_info(local, _addr, _sta, _tmp) \
+ rhl_for_each_entry_rcu(_sta, _tmp, \
+ link_sta_info_hash_lookup(local, _addr), \
+ link_hash_node)
+
+struct link_sta_info *
+link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr);
+
/*
* Get STA info by index, BROKEN!
*/
@@ -799,6 +844,11 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
*/
struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
const u8 *addr, gfp_t gfp);
+struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata,
+ const u8 *mld_addr,
+ unsigned int link_id,
+ const u8 *link_addr,
+ gfp_t gfp);
void sta_info_free(struct ieee80211_local *local, struct sta_info *sta);
@@ -856,7 +906,11 @@ u32 sta_get_expected_throughput(struct sta_info *sta);
void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
unsigned long exp_time);
-u8 sta_info_tx_streams(struct sta_info *sta);
+
+int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id);
+void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id);
+int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id);
+void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id);
void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta);
void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta);
@@ -864,6 +918,10 @@ void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta);
unsigned long ieee80211_sta_last_active(struct sta_info *sta);
+void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta,
+ const u8 *ext_capab,
+ unsigned int ext_capab_len);
+
enum sta_stats_type {
STA_STATS_RATE_TYPE_INVALID = 0,
STA_STATS_RATE_TYPE_LEGACY,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index f6f63a0b1b72..3f9ddd7f04b6 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -5,6 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2008-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2021-2022 Intel Corporation
*/
#include <linux/export.h>
@@ -71,7 +72,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
info->flags |= IEEE80211_TX_INTFL_RETRANSMISSION;
info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
- sta->status_stats.filtered++;
+ sta->deflink.status_stats.filtered++;
/*
* Clear more-data bit on filtered frames, it might be set
@@ -222,11 +223,8 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
* only be the AP. And the only other place updating
* this variable in managed mode is before association.
*/
- sdata->smps_mode = smps_mode;
+ sdata->deflink.smps_mode = smps_mode;
ieee80211_queue_work(&local->hw, &sdata->recalc_smps);
- } else if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
- sta->known_smps_mode = smps_mode;
}
}
}
@@ -246,15 +244,19 @@ static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn)
static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
struct ieee80211_tx_status *status)
{
+ struct ieee80211_rate_status *status_rate = NULL;
int len = sizeof(struct ieee80211_radiotap_header);
+ if (status && status->n_rates)
+ status_rate = &status->rates[status->n_rates - 1];
+
/* IEEE80211_RADIOTAP_RATE rate */
- if (status && status->rate && !(status->rate->flags &
- (RATE_INFO_FLAGS_MCS |
- RATE_INFO_FLAGS_DMG |
- RATE_INFO_FLAGS_EDMG |
- RATE_INFO_FLAGS_VHT_MCS |
- RATE_INFO_FLAGS_HE_MCS)))
+ if (status_rate && !(status_rate->rate_idx.flags &
+ (RATE_INFO_FLAGS_MCS |
+ RATE_INFO_FLAGS_DMG |
+ RATE_INFO_FLAGS_EDMG |
+ RATE_INFO_FLAGS_VHT_MCS |
+ RATE_INFO_FLAGS_HE_MCS)))
len += 2;
else if (info->status.rates[0].idx >= 0 &&
!(info->status.rates[0].flags &
@@ -269,12 +271,12 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
/* IEEE80211_RADIOTAP_MCS
* IEEE80211_RADIOTAP_VHT */
- if (status && status->rate) {
- if (status->rate->flags & RATE_INFO_FLAGS_MCS)
+ if (status_rate) {
+ if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_MCS)
len += 3;
- else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)
+ else if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS)
len = ALIGN(len, 2) + 12;
- else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)
+ else if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_HE_MCS)
len = ALIGN(len, 2) + 12;
} else if (info->status.rates[0].idx >= 0) {
if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
@@ -288,7 +290,6 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
static void
ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
- struct ieee80211_supported_band *sband,
struct sk_buff *skb, int retry_count,
int rtap_len, int shift,
struct ieee80211_tx_status *status)
@@ -296,10 +297,14 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_radiotap_header *rthdr;
+ struct ieee80211_rate_status *status_rate = NULL;
unsigned char *pos;
u16 legacy_rate = 0;
u16 txflags;
+ if (status && status->n_rates)
+ status_rate = &status->rates[status->n_rates - 1];
+
rthdr = skb_push(skb, rtap_len);
memset(rthdr, 0, rtap_len);
@@ -317,18 +322,23 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
/* IEEE80211_RADIOTAP_RATE */
- if (status && status->rate) {
- if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS |
- RATE_INFO_FLAGS_DMG |
- RATE_INFO_FLAGS_EDMG |
- RATE_INFO_FLAGS_VHT_MCS |
- RATE_INFO_FLAGS_HE_MCS)))
- legacy_rate = status->rate->legacy;
+ if (status_rate) {
+ if (!(status_rate->rate_idx.flags &
+ (RATE_INFO_FLAGS_MCS |
+ RATE_INFO_FLAGS_DMG |
+ RATE_INFO_FLAGS_EDMG |
+ RATE_INFO_FLAGS_VHT_MCS |
+ RATE_INFO_FLAGS_HE_MCS)))
+ legacy_rate = status_rate->rate_idx.legacy;
} else if (info->status.rates[0].idx >= 0 &&
!(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
- IEEE80211_TX_RC_VHT_MCS)))
+ IEEE80211_TX_RC_VHT_MCS))) {
+ struct ieee80211_supported_band *sband;
+
+ sband = local->hw.wiphy->bands[info->band];
legacy_rate =
sband->bitrates[info->status.rates[0].idx].bitrate;
+ }
if (legacy_rate) {
rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
@@ -356,20 +366,21 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
*pos = retry_count;
pos++;
- if (status && status->rate &&
- (status->rate->flags & RATE_INFO_FLAGS_MCS)) {
+ if (status_rate && (status_rate->rate_idx.flags & RATE_INFO_FLAGS_MCS))
+ {
rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
IEEE80211_RADIOTAP_MCS_HAVE_BW;
- if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_SHORT_GI)
pos[1] |= IEEE80211_RADIOTAP_MCS_SGI;
- if (status->rate->bw == RATE_INFO_BW_40)
+ if (status_rate->rate_idx.bw == RATE_INFO_BW_40)
pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40;
- pos[2] = status->rate->mcs;
+ pos[2] = status_rate->rate_idx.mcs;
pos += 3;
- } else if (status && status->rate &&
- (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) {
+ } else if (status_rate && (status_rate->rate_idx.flags &
+ RATE_INFO_FLAGS_VHT_MCS))
+ {
u16 known = local->hw.radiotap_vht_details &
(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
@@ -384,12 +395,12 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
pos += 2;
/* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
- if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_SHORT_GI)
*pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
pos++;
/* u8 bandwidth */
- switch (status->rate->bw) {
+ switch (status_rate->rate_idx.bw) {
case RATE_INFO_BW_160:
*pos = 11;
break;
@@ -406,7 +417,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
pos++;
/* u8 mcs_nss[4] */
- *pos = (status->rate->mcs << 4) | status->rate->nss;
+ *pos = (status_rate->rate_idx.mcs << 4) |
+ status_rate->rate_idx.nss;
pos += 4;
/* u8 coding */
@@ -415,8 +427,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
pos++;
/* u16 partial_aid */
pos += 2;
- } else if (status && status->rate &&
- (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
+ } else if (status_rate && (status_rate->rate_idx.flags &
+ RATE_INFO_FLAGS_HE_MCS))
+ {
struct ieee80211_radiotap_he *he;
rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
@@ -434,7 +447,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f)
- he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss);
+ he->data6 |= HE_PREP(DATA6_NSTS, status_rate->rate_idx.nss);
#define CHECK_GI(s) \
BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
@@ -444,12 +457,12 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
CHECK_GI(1_6);
CHECK_GI(3_2);
- he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs);
- he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm);
+ he->data3 |= HE_PREP(DATA3_DATA_MCS, status_rate->rate_idx.mcs);
+ he->data3 |= HE_PREP(DATA3_DATA_DCM, status_rate->rate_idx.he_dcm);
- he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi);
+ he->data5 |= HE_PREP(DATA5_GI, status_rate->rate_idx.he_gi);
- switch (status->rate->bw) {
+ switch (status_rate->rate_idx.bw) {
case RATE_INFO_BW_20:
he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
@@ -480,16 +493,16 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
CHECK_RU_ALLOC(2x996);
he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
- status->rate->he_ru_alloc + 4);
+ status_rate->rate_idx.he_ru_alloc + 4);
break;
default:
- WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw);
+ WARN_ONCE(1, "Invalid SU BW %d\n", status_rate->rate_idx.bw);
}
pos += sizeof(struct ieee80211_radiotap_he);
}
- if ((status && status->rate) || info->status.rates[0].idx < 0)
+ if (status_rate || info->status.rates[0].idx < 0)
return;
/* IEEE80211_RADIOTAP_MCS
@@ -611,9 +624,11 @@ ieee80211_sdata_from_skb(struct ieee80211_local *local, struct sk_buff *skb)
}
static void ieee80211_report_ack_skb(struct ieee80211_local *local,
- struct ieee80211_tx_info *info,
- bool acked, bool dropped)
+ struct sk_buff *orig_skb,
+ bool acked, bool dropped,
+ ktime_t ack_hwtstamp)
{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(orig_skb);
struct sk_buff *skb;
unsigned long flags;
@@ -628,6 +643,21 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_hdr *hdr = (void *)skb->data;
+ bool is_valid_ack_signal =
+ !!(info->status.flags & IEEE80211_TX_STATUS_ACK_SIGNAL_VALID);
+ struct cfg80211_tx_status status = {
+ .cookie = cookie,
+ .buf = skb->data,
+ .len = skb->len,
+ .ack = acked,
+ };
+
+ if (ieee80211_is_timing_measurement(orig_skb) ||
+ ieee80211_is_ftm(orig_skb)) {
+ status.tx_tstamp =
+ ktime_to_ns(skb_hwtstamps(orig_skb)->hwtstamp);
+ status.ack_tstamp = ktime_to_ns(ack_hwtstamp);
+ }
rcu_read_lock();
sdata = ieee80211_sdata_from_skb(local, skb);
@@ -644,12 +674,12 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
cfg80211_probe_status(sdata->dev, hdr->addr1,
cookie, acked,
info->status.ack_signal,
- info->status.is_valid_ack_signal,
+ is_valid_ack_signal,
GFP_ATOMIC);
else if (ieee80211_is_mgmt(hdr->frame_control))
- cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
- skb->data, skb->len,
- acked, GFP_ATOMIC);
+ cfg80211_mgmt_tx_status_ext(&sdata->wdev,
+ &status,
+ GFP_ATOMIC);
else
pr_warn("Unknown status report in ack skb\n");
@@ -666,7 +696,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
}
static void ieee80211_report_used_skb(struct ieee80211_local *local,
- struct sk_buff *skb, bool dropped)
+ struct sk_buff *skb, bool dropped,
+ ktime_t ack_hwtstamp)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
u16 tx_time_est = ieee80211_info_get_tx_time_est(info);
@@ -698,7 +729,7 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
if (!sdata) {
skb->dev = NULL;
- } else {
+ } else if (!dropped) {
unsigned int hdr_size =
ieee80211_hdrlen(hdr->frame_control);
@@ -729,7 +760,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
rcu_read_unlock();
} else if (info->ack_frame_id) {
- ieee80211_report_ack_skb(local, info, acked, dropped);
+ ieee80211_report_ack_skb(local, skb, acked, dropped,
+ ack_hwtstamp);
}
if (!dropped && skb->destructor) {
@@ -754,7 +786,6 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
*/
#define STA_LOST_PKT_THRESHOLD 50
#define STA_LOST_PKT_TIME HZ /* 1 sec since last ACK */
-#define STA_LOST_TDLS_PKT_THRESHOLD 10
#define STA_LOST_TDLS_PKT_TIME (10*HZ) /* 10secs since last ACK */
static void ieee80211_lost_packet(struct sta_info *sta,
@@ -774,26 +805,27 @@ static void ieee80211_lost_packet(struct sta_info *sta,
!(info->flags & IEEE80211_TX_STAT_AMPDU))
return;
- sta->status_stats.lost_packets++;
+ sta->deflink.status_stats.lost_packets++;
if (sta->sta.tdls) {
pkt_time = STA_LOST_TDLS_PKT_TIME;
pkt_thr = STA_LOST_PKT_THRESHOLD;
}
/*
- * If we're in TDLS mode, make sure that all STA_LOST_TDLS_PKT_THRESHOLD
+ * If we're in TDLS mode, make sure that all STA_LOST_PKT_THRESHOLD
* of the last packets were lost, and that no ACK was received in the
* last STA_LOST_TDLS_PKT_TIME ms, before triggering the CQM packet-loss
* mechanism.
* For non-TDLS, use STA_LOST_PKT_THRESHOLD and STA_LOST_PKT_TIME
*/
- if (sta->status_stats.lost_packets < pkt_thr ||
- !time_after(jiffies, sta->status_stats.last_pkt_time + pkt_time))
+ if (sta->deflink.status_stats.lost_packets < pkt_thr ||
+ !time_after(jiffies, sta->deflink.status_stats.last_pkt_time + pkt_time))
return;
cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
- sta->status_stats.lost_packets, GFP_ATOMIC);
- sta->status_stats.lost_packets = 0;
+ sta->deflink.status_stats.lost_packets,
+ GFP_ATOMIC);
+ sta->deflink.status_stats.lost_packets = 0;
}
static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
@@ -830,7 +862,6 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
}
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
- struct ieee80211_supported_band *sband,
int retry_count, int shift, bool send_to_cooked,
struct ieee80211_tx_status *status)
{
@@ -847,7 +878,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
dev_kfree_skb(skb);
return;
}
- ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
+ ieee80211_add_tx_radiotap_header(local, skb, retry_count,
rtap_len, shift, status);
/* XXX: is this sufficient for BPF? */
@@ -897,7 +928,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info = status->info;
struct sta_info *sta;
__le16 fc;
- struct ieee80211_supported_band *sband;
bool send_to_cooked;
bool acked;
bool noack_success;
@@ -905,7 +935,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
int shift = 0;
int tid = IEEE80211_NUM_TIDS;
- sband = local->hw.wiphy->bands[info->band];
fc = hdr->frame_control;
if (status->sta) {
@@ -928,7 +957,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
(ieee80211_is_data(hdr->frame_control)) &&
(rates_idx != -1))
- sta->tx_stats.last_rate =
+ sta->deflink.tx_stats.last_rate =
info->status.rates[rates_idx];
if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
@@ -974,34 +1003,15 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
return;
} else if (ieee80211_is_data_present(fc)) {
if (!acked && !noack_success)
- sta->status_stats.msdu_failed[tid]++;
+ sta->deflink.status_stats.msdu_failed[tid]++;
- sta->status_stats.msdu_retries[tid] +=
+ sta->deflink.status_stats.msdu_retries[tid] +=
retry_count;
}
if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked)
ieee80211_frame_acked(sta, skb);
- } else if (wiphy_ext_feature_isset(local->hw.wiphy,
- NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
- struct ieee80211_sub_if_data *sdata;
- struct ieee80211_txq *txq;
- u32 airtime;
-
- /* Account airtime to multicast queue */
- sdata = ieee80211_sdata_from_skb(local, skb);
-
- if (sdata && (txq = sdata->vif.txq)) {
- airtime = info->status.tx_time ?:
- ieee80211_calc_expected_tx_airtime(hw,
- &sdata->vif,
- NULL,
- skb->len,
- false);
-
- ieee80211_register_airtime(txq, airtime, 0);
- }
}
/* SNMP counters
@@ -1045,7 +1055,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
jiffies + msecs_to_jiffies(10));
}
- ieee80211_report_used_skb(local, skb, false);
+ ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp);
/* this was a transmitted frame, but now we want to reuse it */
skb_orphan(skb);
@@ -1067,7 +1077,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
}
/* send to monitor interfaces */
- ieee80211_tx_monitor(local, skb, sband, retry_count, shift,
+ ieee80211_tx_monitor(local, skb, retry_count, shift,
send_to_cooked, status);
}
@@ -1099,17 +1109,17 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info = status->info;
struct ieee80211_sta *pubsta = status->sta;
struct sk_buff *skb = status->skb;
- struct ieee80211_supported_band *sband;
struct sta_info *sta = NULL;
int rates_idx, retry_count;
- bool acked, noack_success;
+ bool acked, noack_success, ack_signal_valid;
u16 tx_time_est;
if (pubsta) {
sta = container_of(pubsta, struct sta_info, sta);
- if (status->rate)
- sta->tx_stats.last_rate_info = *status->rate;
+ if (status->n_rates)
+ sta->deflink.tx_stats.last_rate_info =
+ status->rates[status->n_rates - 1].rate_idx;
}
if (skb && (tx_time_est =
@@ -1129,17 +1139,17 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
- sband = hw->wiphy->bands[info->band];
-
acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED);
+ ack_signal_valid =
+ !!(info->status.flags & IEEE80211_TX_STATUS_ACK_SIGNAL_VALID);
if (pubsta) {
struct ieee80211_sub_if_data *sdata = sta->sdata;
if (!acked && !noack_success)
- sta->status_stats.retry_failed++;
- sta->status_stats.retry_count += retry_count;
+ sta->deflink.status_stats.retry_failed++;
+ sta->deflink.status_stats.retry_count += retry_count;
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
@@ -1148,24 +1158,24 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
acked, info->status.tx_time);
if (acked) {
- sta->status_stats.last_ack = jiffies;
+ sta->deflink.status_stats.last_ack = jiffies;
- if (sta->status_stats.lost_packets)
- sta->status_stats.lost_packets = 0;
+ if (sta->deflink.status_stats.lost_packets)
+ sta->deflink.status_stats.lost_packets = 0;
/* Track when last packet was ACKed */
- sta->status_stats.last_pkt_time = jiffies;
+ sta->deflink.status_stats.last_pkt_time = jiffies;
/* Reset connection monitor */
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
unlikely(sdata->u.mgd.probe_send_count > 0))
sdata->u.mgd.probe_send_count = 0;
- if (info->status.is_valid_ack_signal) {
- sta->status_stats.last_ack_signal =
+ if (ack_signal_valid) {
+ sta->deflink.status_stats.last_ack_signal =
(s8)info->status.ack_signal;
- sta->status_stats.ack_signal_filled = true;
- ewma_avg_signal_add(&sta->status_stats.avg_ack_signal,
+ sta->deflink.status_stats.ack_signal_filled = true;
+ ewma_avg_signal_add(&sta->deflink.status_stats.avg_ack_signal,
-info->status.ack_signal);
}
} else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
@@ -1183,7 +1193,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
}
}
- rate_control_tx_status(local, sband, status);
+ rate_control_tx_status(local, status);
if (ieee80211_vif_is_mesh(&sta->sdata->vif))
ieee80211s_update_metric(local, sta, status);
}
@@ -1208,7 +1218,7 @@ free:
if (!skb)
return;
- ieee80211_report_used_skb(local, skb, false);
+ ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp);
if (status->free_list)
list_add_tail(&skb->list, status->free_list);
else
@@ -1221,17 +1231,16 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_supported_band *sband = hw->wiphy->bands[info->band];
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
struct ieee80211_tx_status status = {
.info = info,
.sta = pubsta,
};
- rate_control_tx_status(local, sband, &status);
+ rate_control_tx_status(local, &status);
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
- sta->tx_stats.last_rate = info->status.rates[0];
+ sta->deflink.tx_stats.last_rate = info->status.rates[0];
}
EXPORT_SYMBOL(ieee80211_tx_rate_update);
@@ -1270,8 +1279,9 @@ EXPORT_SYMBOL(ieee80211_report_low_ack);
void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb)
{
struct ieee80211_local *local = hw_to_local(hw);
+ ktime_t kt = ktime_set(0, 0);
- ieee80211_report_used_skb(local, skb, true);
+ ieee80211_report_used_skb(local, skb, true, kt);
dev_kfree_skb_any(skb);
}
EXPORT_SYMBOL(ieee80211_free_txskb);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 137be9ec94af..f4b4d25eef95 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -6,7 +6,7 @@
* Copyright 2014, Intel Corporation
* Copyright 2014 Intel Mobile Communications GmbH
* Copyright 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2019, 2021 Intel Corporation
+ * Copyright (C) 2019, 2021-2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -218,7 +218,7 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
lnkid->ie_type = WLAN_EID_LINK_ID;
lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2;
- memcpy(lnkid->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(lnkid->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(lnkid->init_sta, init_addr, ETH_ALEN);
memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN);
}
@@ -230,7 +230,7 @@ ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
*pos++ = WLAN_EID_AID;
*pos++ = 2; /* len */
- put_unaligned_le16(sdata->vif.bss_conf.aid, pos);
+ put_unaligned_le16(sdata->vif.cfg.aid, pos);
}
/* translate numbering in the WMM parameter IE to the mac80211 notation */
@@ -293,7 +293,7 @@ static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata,
* doesn't support it, as mandated by 802.11-2012 section 10.22.4
*/
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- txq = &sdata->tx_conf[ieee80211_ac_from_wmm(i)];
+ txq = &sdata->deflink.tx_conf[ieee80211_ac_from_wmm(i)];
wmm->ac[i].aci_aifsn = ieee80211_wmm_aci_aifsn(txq->aifs,
txq->acm, i);
wmm->ac[i].cw = ieee80211_wmm_ecw(txq->cw_min, txq->cw_max);
@@ -308,7 +308,8 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
/* IEEE802.11ac-2013 Table E-4 */
u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
struct cfg80211_chan_def uc = sta->tdls_chandef;
- enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(sta);
+ enum nl80211_chan_width max_width =
+ ieee80211_sta_cap_chan_bw(&sta->deflink);
int i;
/* only support upgrading non-narrow channels up to 80Mhz */
@@ -459,9 +460,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
} else if (action_code == WLAN_TDLS_SETUP_RESPONSE &&
- ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+ ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) {
/* the peer caps are already intersected with our own */
- memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap));
+ memcpy(&ht_cap, &sta->sta.deflink.ht_cap, sizeof(ht_cap));
pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
@@ -510,9 +511,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap);
} else if (action_code == WLAN_TDLS_SETUP_RESPONSE &&
- vht_cap.vht_supported && sta->sta.vht_cap.vht_supported) {
+ vht_cap.vht_supported && sta->sta.deflink.vht_cap.vht_supported) {
/* the peer caps are already intersected with our own */
- memcpy(&vht_cap, &sta->sta.vht_cap, sizeof(vht_cap));
+ memcpy(&vht_cap, &sta->sta.deflink.vht_cap, sizeof(vht_cap));
/* the AID is present only when VHT is implemented */
ieee80211_tdls_add_aid(sdata, skb);
@@ -545,7 +546,6 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
size_t extra_ies_len)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
size_t offset = 0, noffset;
struct sta_info *sta, *ap_sta;
struct ieee80211_supported_band *sband;
@@ -558,7 +558,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, peer);
- ap_sta = sta_info_get(sdata, ifmgd->bssid);
+ ap_sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid);
if (WARN_ON_ONCE(!sta || !ap_sta)) {
mutex_unlock(&local->sta_mtx);
return;
@@ -603,13 +603,13 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
* if HT support is only added in TDLS, we need an HT-operation IE.
* add the IE as required by IEEE802.11-2012 9.23.3.2.
*/
- if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+ if (!ap_sta->sta.deflink.ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) {
u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
- ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
+ ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap,
&sdata->vif.bss_conf.chandef, prot,
true);
}
@@ -618,7 +618,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
/* only include VHT-operation if not on the 2.4GHz band */
if (sband->band != NL80211_BAND_2GHZ &&
- sta->sta.vht_cap.vht_supported) {
+ sta->sta.deflink.vht_cap.vht_supported) {
/*
* if both peers support WIDER_BW, we can expand the chandef to
* a wider compatible one, up to 80MHz
@@ -627,7 +627,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
ieee80211_tdls_chandef_vht_upgrade(sdata, sta);
pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
- ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
+ ieee80211_ie_build_vht_oper(pos, &sta->sta.deflink.vht_cap,
&sta->tdls_chandef);
}
@@ -833,7 +833,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
mgmt = skb_put_zero(skb, 24);
memcpy(mgmt->da, peer, ETH_ALEN);
memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ACTION);
@@ -1054,7 +1054,8 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
/* disable bottom halves when entering the Tx path */
local_bh_disable();
- __ieee80211_subif_start_xmit(skb, dev, flags, 0, NULL);
+ __ieee80211_subif_start_xmit(skb, dev, flags,
+ IEEE80211_TX_CTRL_MLO_LINK_UNSPEC, NULL);
local_bh_enable();
return ret;
@@ -1072,7 +1073,8 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- enum ieee80211_smps_mode smps_mode = sdata->u.mgd.driver_smps_mode;
+ enum ieee80211_smps_mode smps_mode =
+ sdata->deflink.u.mgd.driver_smps_mode;
int ret;
/* don't support setup with forced SMPS mode that's not off */
@@ -1254,7 +1256,7 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband;
mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (conf) {
width = conf->def.width;
@@ -1268,10 +1270,10 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
enum ieee80211_sta_rx_bandwidth bw;
bw = ieee80211_chan_width_to_rx_bw(conf->def.width);
- bw = min(bw, ieee80211_sta_cap_rx_bw(sta));
- if (bw != sta->sta.bandwidth) {
- sta->sta.bandwidth = bw;
- rate_control_rate_update(local, sband, sta,
+ bw = min(bw, ieee80211_sta_cap_rx_bw(&sta->deflink));
+ if (bw != sta->sta.deflink.bandwidth) {
+ sta->sta.deflink.bandwidth = bw;
+ rate_control_rate_update(local, sband, sta, 0,
IEEE80211_RC_BW_CHANGED);
/*
* if a TDLS peer BW was updated, we need to
@@ -1296,7 +1298,7 @@ static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata)
if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
!test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
!test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) ||
- !sta->sta.ht_cap.ht_supported)
+ !sta->sta.deflink.ht_cap.ht_supported)
continue;
result = true;
break;
@@ -1310,7 +1312,6 @@ static void
iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
bool tdls_ht;
u16 protection = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
@@ -1318,10 +1319,10 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
u16 opmode;
/* Nothing to do if the BSS connection uses HT */
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+ if (!(sdata->deflink.u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT))
return;
- tdls_ht = (sta && sta->sta.ht_cap.ht_supported) ||
+ tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) ||
iee80211_tdls_have_ht_peers(sdata);
opmode = sdata->vif.bss_conf.ht_operation_mode;
@@ -1335,7 +1336,8 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
return;
sdata->vif.bss_conf.ht_operation_mode = opmode;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
+ ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+ BSS_CHANGED_HT);
}
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
@@ -1372,7 +1374,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
switch (oper) {
case NL80211_TDLS_ENABLE_LINK:
- if (sdata->vif.csa_active) {
+ if (sdata->vif.bss_conf.csa_active) {
tdls_dbg(sdata, "TDLS: disallow link during CSA\n");
ret = -EBUSY;
break;
@@ -1431,7 +1433,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
if (ret == 0)
ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.request_smps_work);
+ &sdata->deflink.u.mgd.request_smps_work);
mutex_unlock(&local->mtx);
sdata_unlock(sdata);
@@ -1444,7 +1446,7 @@ void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- if (vif->type != NL80211_IFTYPE_STATION || !vif->bss_conf.assoc) {
+ if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) {
sdata_err(sdata, "Discarding TDLS oper %d - not STA or disconnected\n",
oper);
return;
@@ -1719,7 +1721,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata,
}
elems = ieee802_11_parse_elems(tf->u.chan_switch_resp.variable,
- skb->len - baselen, false, NULL, NULL);
+ skb->len - baselen, false, NULL);
if (!elems) {
ret = -ENOMEM;
goto out;
@@ -1837,7 +1839,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
}
elems = ieee802_11_parse_elems(tf->u.chan_switch_req.variable,
- skb->len - baselen, false, NULL, NULL);
+ skb->len - baselen, false, NULL);
if (!elems)
return -ENOMEM;
@@ -1900,7 +1902,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
}
/* peer should have known better */
- if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs &&
+ if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs &&
elems->sec_chan_offs->sec_chan_offs) {
tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
ret = -ENOTSUPP;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index d91498f77796..9f4377566c42 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
-* Portions of this file
-* Copyright(c) 2016-2017 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2021 Intel Corporation
-*/
+ * Portions of this file
+ * Copyright(c) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 - 2022 Intel Corporation
+ */
#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
#define __MAC80211_DRIVER_TRACE
@@ -390,22 +390,74 @@ TRACE_EVENT(drv_config,
)
);
-TRACE_EVENT(drv_bss_info_changed,
+TRACE_EVENT(drv_vif_cfg_changed,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- struct ieee80211_bss_conf *info,
- u32 changed),
+ u64 changed),
- TP_ARGS(local, sdata, info, changed),
+ TP_ARGS(local, sdata, changed),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
- __field(u32, changed)
+ __field(u64, changed)
__field(bool, assoc)
__field(bool, ibss_joined)
__field(bool, ibss_creator)
__field(u16, aid)
+ __dynamic_array(u32, arp_addr_list,
+ sdata->vif.cfg.arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
+ IEEE80211_BSS_ARP_ADDR_LIST_LEN :
+ sdata->vif.cfg.arp_addr_cnt)
+ __field(int, arp_addr_cnt)
+ __dynamic_array(u8, ssid, sdata->vif.cfg.ssid_len)
+ __field(int, s1g)
+ __field(bool, idle)
+ __field(bool, ps)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->changed = changed;
+ __entry->aid = sdata->vif.cfg.aid;
+ __entry->assoc = sdata->vif.cfg.assoc;
+ __entry->ibss_joined = sdata->vif.cfg.ibss_joined;
+ __entry->ibss_creator = sdata->vif.cfg.ibss_creator;
+ __entry->ps = sdata->vif.cfg.ps;
+
+ __entry->arp_addr_cnt = sdata->vif.cfg.arp_addr_cnt;
+ memcpy(__get_dynamic_array(arp_addr_list),
+ sdata->vif.cfg.arp_addr_list,
+ sizeof(u32) * (sdata->vif.cfg.arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
+ IEEE80211_BSS_ARP_ADDR_LIST_LEN :
+ sdata->vif.cfg.arp_addr_cnt));
+ memcpy(__get_dynamic_array(ssid),
+ sdata->vif.cfg.ssid,
+ sdata->vif.cfg.ssid_len);
+ __entry->s1g = sdata->vif.cfg.s1g;
+ __entry->idle = sdata->vif.cfg.idle;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " changed:%#llx",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->changed
+ )
+);
+
+TRACE_EVENT(drv_link_info_changed,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
+ u64 changed),
+
+ TP_ARGS(local, sdata, link_conf, changed),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u64, changed)
+ __field(int, link_id)
__field(bool, cts)
__field(bool, shortpre)
__field(bool, shortslot)
@@ -424,15 +476,7 @@ TRACE_EVENT(drv_bss_info_changed,
__field(u32, channel_width)
__field(u32, channel_cfreq1)
__field(u32, channel_cfreq1_offset)
- __dynamic_array(u32, arp_addr_list,
- info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
- IEEE80211_BSS_ARP_ADDR_LIST_LEN :
- info->arp_addr_cnt)
- __field(int, arp_addr_cnt)
__field(bool, qos)
- __field(bool, idle)
- __field(bool, ps)
- __dynamic_array(u8, ssid, info->ssid_len)
__field(bool, hidden_ssid)
__field(int, txpower)
__field(u8, p2p_oppps_ctwindow)
@@ -442,46 +486,36 @@ TRACE_EVENT(drv_bss_info_changed,
LOCAL_ASSIGN;
VIF_ASSIGN;
__entry->changed = changed;
- __entry->aid = info->aid;
- __entry->assoc = info->assoc;
- __entry->ibss_joined = info->ibss_joined;
- __entry->ibss_creator = info->ibss_creator;
- __entry->shortpre = info->use_short_preamble;
- __entry->cts = info->use_cts_prot;
- __entry->shortslot = info->use_short_slot;
- __entry->enable_beacon = info->enable_beacon;
- __entry->dtimper = info->dtim_period;
- __entry->bcnint = info->beacon_int;
- __entry->assoc_cap = info->assoc_capability;
- __entry->sync_tsf = info->sync_tsf;
- __entry->sync_device_ts = info->sync_device_ts;
- __entry->sync_dtim_count = info->sync_dtim_count;
- __entry->basic_rates = info->basic_rates;
- memcpy(__entry->mcast_rate, info->mcast_rate,
+ __entry->link_id = link_conf->link_id;
+ __entry->shortpre = link_conf->use_short_preamble;
+ __entry->cts = link_conf->use_cts_prot;
+ __entry->shortslot = link_conf->use_short_slot;
+ __entry->enable_beacon = link_conf->enable_beacon;
+ __entry->dtimper = link_conf->dtim_period;
+ __entry->bcnint = link_conf->beacon_int;
+ __entry->assoc_cap = link_conf->assoc_capability;
+ __entry->sync_tsf = link_conf->sync_tsf;
+ __entry->sync_device_ts = link_conf->sync_device_ts;
+ __entry->sync_dtim_count = link_conf->sync_dtim_count;
+ __entry->basic_rates = link_conf->basic_rates;
+ memcpy(__entry->mcast_rate, link_conf->mcast_rate,
sizeof(__entry->mcast_rate));
- __entry->ht_operation_mode = info->ht_operation_mode;
- __entry->cqm_rssi_thold = info->cqm_rssi_thold;
- __entry->cqm_rssi_hyst = info->cqm_rssi_hyst;
- __entry->channel_width = info->chandef.width;
- __entry->channel_cfreq1 = info->chandef.center_freq1;
- __entry->channel_cfreq1_offset = info->chandef.freq1_offset;
- __entry->arp_addr_cnt = info->arp_addr_cnt;
- memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list,
- sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
- IEEE80211_BSS_ARP_ADDR_LIST_LEN :
- info->arp_addr_cnt));
- __entry->qos = info->qos;
- __entry->idle = info->idle;
- __entry->ps = info->ps;
- memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
- __entry->hidden_ssid = info->hidden_ssid;
- __entry->txpower = info->txpower;
- __entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow;
+ __entry->ht_operation_mode = link_conf->ht_operation_mode;
+ __entry->cqm_rssi_thold = link_conf->cqm_rssi_thold;
+ __entry->cqm_rssi_hyst = link_conf->cqm_rssi_hyst;
+ __entry->channel_width = link_conf->chandef.width;
+ __entry->channel_cfreq1 = link_conf->chandef.center_freq1;
+ __entry->channel_cfreq1_offset = link_conf->chandef.freq1_offset;
+ __entry->qos = link_conf->qos;
+ __entry->hidden_ssid = link_conf->hidden_ssid;
+ __entry->txpower = link_conf->txpower;
+ __entry->p2p_oppps_ctwindow = link_conf->p2p_noa_attr.oppps_ctwindow;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " changed:%#x",
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->changed
+ LOCAL_PR_FMT VIF_PR_FMT " link_id:%d, changed:%#llx",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id,
+ __entry->changed
)
);
@@ -860,8 +894,8 @@ TRACE_EVENT(drv_sta_set_txpwr,
LOCAL_ASSIGN;
VIF_ASSIGN;
STA_ASSIGN;
- __entry->txpwr = sta->txpwr.power;
- __entry->type = sta->txpwr.type;
+ __entry->txpwr = sta->deflink.txpwr.power;
+ __entry->type = sta->deflink.txpwr.type;
),
TP_printk(
@@ -969,13 +1003,15 @@ DEFINE_EVENT(sta_event, drv_sta_rate_tbl_update,
TRACE_EVENT(drv_conf_tx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
+ unsigned int link_id,
u16 ac, const struct ieee80211_tx_queue_params *params),
- TP_ARGS(local, sdata, ac, params),
+ TP_ARGS(local, sdata, link_id, ac, params),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
+ __field(unsigned int, link_id)
__field(u16, ac)
__field(u16, txop)
__field(u16, cw_min)
@@ -987,6 +1023,7 @@ TRACE_EVENT(drv_conf_tx,
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
+ __entry->link_id = link_id;
__entry->ac = ac;
__entry->txop = params->txop;
__entry->cw_max = params->cw_max;
@@ -996,8 +1033,8 @@ TRACE_EVENT(drv_conf_tx,
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " AC:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->ac
+ LOCAL_PR_FMT VIF_PR_FMT " link_id: %d, AC:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->ac
)
);
@@ -1592,6 +1629,7 @@ struct trace_chandef_entry {
struct trace_switch_entry {
struct trace_vif_entry vif;
+ unsigned int link_id;
struct trace_chandef_entry old_chandef;
struct trace_chandef_entry new_chandef;
} __packed;
@@ -1631,6 +1669,7 @@ TRACE_EVENT(drv_switch_vif_chanctx,
SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type);
SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p);
+ SWITCH_ENTRY_ASSIGN(link_id, link_conf->link_id);
strncpy(local_vifs[i].vif.vif_name,
sdata->name,
sizeof(local_vifs[i].vif.vif_name));
@@ -1671,77 +1710,105 @@ TRACE_EVENT(drv_switch_vif_chanctx,
DECLARE_EVENT_CLASS(local_sdata_chanctx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx *ctx),
- TP_ARGS(local, sdata, ctx),
+ TP_ARGS(local, sdata, link_conf, ctx),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
CHANCTX_ENTRY
+ __field(unsigned int, link_id)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
CHANCTX_ASSIGN;
+ __entry->link_id = link_conf->link_id;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT CHANCTX_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, CHANCTX_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT " link_id:%d" CHANCTX_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, CHANCTX_PR_ARG
)
);
DEFINE_EVENT(local_sdata_chanctx, drv_assign_vif_chanctx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx *ctx),
- TP_ARGS(local, sdata, ctx)
+ TP_ARGS(local, sdata, link_conf, ctx)
);
DEFINE_EVENT(local_sdata_chanctx, drv_unassign_vif_chanctx,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
struct ieee80211_chanctx *ctx),
- TP_ARGS(local, sdata, ctx)
+ TP_ARGS(local, sdata, link_conf, ctx)
);
TRACE_EVENT(drv_start_ap,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- struct ieee80211_bss_conf *info),
+ struct ieee80211_bss_conf *link_conf),
- TP_ARGS(local, sdata, info),
+ TP_ARGS(local, sdata, link_conf),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
+ __field(u32, link_id)
__field(u8, dtimper)
__field(u16, bcnint)
- __dynamic_array(u8, ssid, info->ssid_len)
+ __dynamic_array(u8, ssid, sdata->vif.cfg.ssid_len)
__field(bool, hidden_ssid)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
- __entry->dtimper = info->dtim_period;
- __entry->bcnint = info->beacon_int;
- memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
- __entry->hidden_ssid = info->hidden_ssid;
+ __entry->link_id = link_conf->link_id;
+ __entry->dtimper = link_conf->dtim_period;
+ __entry->bcnint = link_conf->beacon_int;
+ __entry->hidden_ssid = link_conf->hidden_ssid;
+ memcpy(__get_dynamic_array(ssid),
+ sdata->vif.cfg.ssid,
+ sdata->vif.cfg.ssid_len);
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT " link id %u",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id
)
);
-DEFINE_EVENT(local_sdata_evt, drv_stop_ap,
+TRACE_EVENT(drv_stop_ap,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata),
- TP_ARGS(local, sdata)
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf),
+
+ TP_ARGS(local, sdata, link_conf),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u32, link_id)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->link_id = link_conf->link_id;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " link id %u",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id
+ )
);
TRACE_EVENT(drv_reconfig_complete,
@@ -1786,7 +1853,7 @@ TRACE_EVENT(drv_join_ibss,
VIF_ENTRY
__field(u8, dtimper)
__field(u16, bcnint)
- __dynamic_array(u8, ssid, info->ssid_len)
+ __dynamic_array(u8, ssid, sdata->vif.cfg.ssid_len)
),
TP_fast_assign(
@@ -1794,7 +1861,9 @@ TRACE_EVENT(drv_join_ibss,
VIF_ASSIGN;
__entry->dtimper = info->dtim_period;
__entry->bcnint = info->beacon_int;
- memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
+ memcpy(__get_dynamic_array(ssid),
+ sdata->vif.cfg.ssid,
+ sdata->vif.cfg.ssid_len);
),
TP_printk(
@@ -1972,933 +2041,991 @@ DEFINE_EVENT(local_sdata_evt, drv_abort_pmsr,
TP_ARGS(local, sdata)
);
-/*
- * Tracing for API calls that drivers call.
- */
-
-TRACE_EVENT(api_start_tx_ba_session,
- TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+TRACE_EVENT(drv_set_default_unicast_key,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ int key_idx),
- TP_ARGS(sta, tid),
+ TP_ARGS(local, sdata, key_idx),
TP_STRUCT__entry(
- STA_ENTRY
- __field(u16, tid)
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(int, key_idx)
),
TP_fast_assign(
- STA_ASSIGN;
- __entry->tid = tid;
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->key_idx = key_idx;
),
- TP_printk(
- STA_PR_FMT " tid:%d",
- STA_PR_ARG, __entry->tid
- )
+ TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx)
);
-TRACE_EVENT(api_start_tx_ba_cb,
- TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+TRACE_EVENT(drv_channel_switch_beacon,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_chan_def *chandef),
- TP_ARGS(sdata, ra, tid),
+ TP_ARGS(local, sdata, chandef),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
- __array(u8, ra, ETH_ALEN)
- __field(u16, tid)
+ CHANDEF_ENTRY
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
- memcpy(__entry->ra, ra, ETH_ALEN);
- __entry->tid = tid;
+ CHANDEF_ASSIGN(chandef);
),
TP_printk(
- VIF_PR_FMT " ra:%pM tid:%d",
- VIF_PR_ARG, __entry->ra, __entry->tid
+ LOCAL_PR_FMT VIF_PR_FMT " channel switch to " CHANDEF_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG
)
);
-TRACE_EVENT(api_stop_tx_ba_session,
- TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+TRACE_EVENT(drv_pre_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel_switch *ch_switch),
- TP_ARGS(sta, tid),
+ TP_ARGS(local, sdata, ch_switch),
TP_STRUCT__entry(
- STA_ENTRY
- __field(u16, tid)
+ LOCAL_ENTRY
+ VIF_ENTRY
+ CHANDEF_ENTRY
+ __field(u64, timestamp)
+ __field(u32, device_timestamp)
+ __field(bool, block_tx)
+ __field(u8, count)
),
TP_fast_assign(
- STA_ASSIGN;
- __entry->tid = tid;
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ CHANDEF_ASSIGN(&ch_switch->chandef)
+ __entry->timestamp = ch_switch->timestamp;
+ __entry->device_timestamp = ch_switch->device_timestamp;
+ __entry->block_tx = ch_switch->block_tx;
+ __entry->count = ch_switch->count;
),
TP_printk(
- STA_PR_FMT " tid:%d",
- STA_PR_ARG, __entry->tid
+ LOCAL_PR_FMT VIF_PR_FMT " prepare channel switch to "
+ CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
+ __entry->block_tx, __entry->timestamp
)
);
-TRACE_EVENT(api_stop_tx_ba_cb,
- TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
- TP_ARGS(sdata, ra, tid),
+DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
+
+TRACE_EVENT(drv_channel_switch_rx_beacon,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel_switch *ch_switch),
+
+ TP_ARGS(local, sdata, ch_switch),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
- __array(u8, ra, ETH_ALEN)
- __field(u16, tid)
+ CHANDEF_ENTRY
+ __field(u64, timestamp)
+ __field(u32, device_timestamp)
+ __field(bool, block_tx)
+ __field(u8, count)
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
- memcpy(__entry->ra, ra, ETH_ALEN);
- __entry->tid = tid;
+ CHANDEF_ASSIGN(&ch_switch->chandef)
+ __entry->timestamp = ch_switch->timestamp;
+ __entry->device_timestamp = ch_switch->device_timestamp;
+ __entry->block_tx = ch_switch->block_tx;
+ __entry->count = ch_switch->count;
),
TP_printk(
- VIF_PR_FMT " ra:%pM tid:%d",
- VIF_PR_ARG, __entry->ra, __entry->tid
+ LOCAL_PR_FMT VIF_PR_FMT
+ " received a channel switch beacon to "
+ CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
+ __entry->block_tx, __entry->timestamp
)
);
-DEFINE_EVENT(local_only_evt, api_restart_hw,
- TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local)
-);
-
-TRACE_EVENT(api_beacon_loss,
- TP_PROTO(struct ieee80211_sub_if_data *sdata),
+TRACE_EVENT(drv_get_txpower,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ int dbm, int ret),
- TP_ARGS(sdata),
+ TP_ARGS(local, sdata, dbm, ret),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
+ __field(int, dbm)
+ __field(int, ret)
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
+ __entry->dbm = dbm;
+ __entry->ret = ret;
),
TP_printk(
- VIF_PR_FMT,
- VIF_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT " dbm:%d ret:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->dbm, __entry->ret
)
);
-TRACE_EVENT(api_connection_loss,
- TP_PROTO(struct ieee80211_sub_if_data *sdata),
+TRACE_EVENT(drv_tdls_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, u8 oper_class,
+ struct cfg80211_chan_def *chandef),
- TP_ARGS(sdata),
+ TP_ARGS(local, sdata, sta, oper_class, chandef),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
+ STA_ENTRY
+ __field(u8, oper_class)
+ CHANDEF_ENTRY
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
+ STA_ASSIGN;
+ __entry->oper_class = oper_class;
+ CHANDEF_ASSIGN(chandef)
),
TP_printk(
- VIF_PR_FMT,
- VIF_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT " tdls channel switch to"
+ CHANDEF_PR_FMT " oper_class:%d " STA_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->oper_class,
+ STA_PR_ARG
)
);
-TRACE_EVENT(api_disconnect,
- TP_PROTO(struct ieee80211_sub_if_data *sdata, bool reconnect),
+TRACE_EVENT(drv_tdls_cancel_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta),
- TP_ARGS(sdata, reconnect),
+ TP_ARGS(local, sdata, sta),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
- __field(int, reconnect)
+ STA_ENTRY
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
- __entry->reconnect = reconnect;
+ STA_ASSIGN;
),
TP_printk(
- VIF_PR_FMT " reconnect:%d",
- VIF_PR_ARG, __entry->reconnect
+ LOCAL_PR_FMT VIF_PR_FMT
+ " tdls cancel channel switch with " STA_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
)
);
-TRACE_EVENT(api_cqm_rssi_notify,
- TP_PROTO(struct ieee80211_sub_if_data *sdata,
- enum nl80211_cqm_rssi_threshold_event rssi_event,
- s32 rssi_level),
+TRACE_EVENT(drv_tdls_recv_channel_switch,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_tdls_ch_sw_params *params),
- TP_ARGS(sdata, rssi_event, rssi_level),
+ TP_ARGS(local, sdata, params),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
- __field(u32, rssi_event)
- __field(s32, rssi_level)
+ __field(u8, action_code)
+ STA_ENTRY
+ CHANDEF_ENTRY
+ __field(u32, status)
+ __field(bool, peer_initiator)
+ __field(u32, timestamp)
+ __field(u16, switch_time)
+ __field(u16, switch_timeout)
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
- __entry->rssi_event = rssi_event;
- __entry->rssi_level = rssi_level;
+ STA_NAMED_ASSIGN(params->sta);
+ CHANDEF_ASSIGN(params->chandef)
+ __entry->peer_initiator = params->sta->tdls_initiator;
+ __entry->action_code = params->action_code;
+ __entry->status = params->status;
+ __entry->timestamp = params->timestamp;
+ __entry->switch_time = params->switch_time;
+ __entry->switch_timeout = params->switch_timeout;
),
TP_printk(
- VIF_PR_FMT " event:%d rssi:%d",
- VIF_PR_ARG, __entry->rssi_event, __entry->rssi_level
+ LOCAL_PR_FMT VIF_PR_FMT " received tdls channel switch packet"
+ " action:%d status:%d time:%d switch time:%d switch"
+ " timeout:%d initiator: %d chan:" CHANDEF_PR_FMT STA_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->action_code, __entry->status,
+ __entry->timestamp, __entry->switch_time,
+ __entry->switch_timeout, __entry->peer_initiator,
+ CHANDEF_PR_ARG, STA_PR_ARG
)
);
-DEFINE_EVENT(local_sdata_evt, api_cqm_beacon_loss_notify,
+TRACE_EVENT(drv_wake_tx_queue,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata),
- TP_ARGS(local, sdata)
-);
-
-TRACE_EVENT(api_scan_completed,
- TP_PROTO(struct ieee80211_local *local, bool aborted),
+ struct ieee80211_sub_if_data *sdata,
+ struct txq_info *txq),
- TP_ARGS(local, aborted),
+ TP_ARGS(local, sdata, txq),
TP_STRUCT__entry(
LOCAL_ENTRY
- __field(bool, aborted)
+ VIF_ENTRY
+ STA_ENTRY
+ __field(u8, ac)
+ __field(u8, tid)
),
TP_fast_assign(
+ struct ieee80211_sta *sta = txq->txq.sta;
+
LOCAL_ASSIGN;
- __entry->aborted = aborted;
+ VIF_ASSIGN;
+ STA_ASSIGN;
+ __entry->ac = txq->txq.ac;
+ __entry->tid = txq->txq.tid;
),
TP_printk(
- LOCAL_PR_FMT " aborted:%d",
- LOCAL_PR_ARG, __entry->aborted
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ac:%d tid:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid
)
);
-TRACE_EVENT(api_sched_scan_results,
- TP_PROTO(struct ieee80211_local *local),
+TRACE_EVENT(drv_get_ftm_responder_stats,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ftm_responder_stats *ftm_stats),
- TP_ARGS(local),
+ TP_ARGS(local, sdata, ftm_stats),
TP_STRUCT__entry(
LOCAL_ENTRY
+ VIF_ENTRY
),
TP_fast_assign(
LOCAL_ASSIGN;
+ VIF_ASSIGN;
),
TP_printk(
- LOCAL_PR_FMT, LOCAL_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG
)
);
-TRACE_EVENT(api_sched_scan_stopped,
- TP_PROTO(struct ieee80211_local *local),
+DEFINE_EVENT(local_sdata_addr_evt, drv_update_vif_offload,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
- TP_ARGS(local),
+DECLARE_EVENT_CLASS(sta_flag_evt,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, bool enabled),
+
+ TP_ARGS(local, sdata, sta, enabled),
TP_STRUCT__entry(
LOCAL_ENTRY
+ VIF_ENTRY
+ STA_ENTRY
+ __field(bool, enabled)
),
TP_fast_assign(
LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ STA_ASSIGN;
+ __entry->enabled = enabled;
),
TP_printk(
- LOCAL_PR_FMT, LOCAL_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " enabled:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->enabled
)
);
-TRACE_EVENT(api_sta_block_awake,
+DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sta *sta, bool block),
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, bool enabled),
- TP_ARGS(local, sta, block),
+ TP_ARGS(local, sdata, sta, enabled)
+);
+
+DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, bool enabled),
+
+ TP_ARGS(local, sdata, sta, enabled)
+);
+
+TRACE_EVENT(drv_add_twt_setup,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt,
+ struct ieee80211_twt_params *twt_agrt),
+
+ TP_ARGS(local, sta, twt, twt_agrt),
TP_STRUCT__entry(
LOCAL_ENTRY
STA_ENTRY
- __field(bool, block)
+ __field(u8, dialog_token)
+ __field(u8, control)
+ __field(__le16, req_type)
+ __field(__le64, twt)
+ __field(u8, duration)
+ __field(__le16, mantissa)
+ __field(u8, channel)
),
TP_fast_assign(
LOCAL_ASSIGN;
STA_ASSIGN;
- __entry->block = block;
+ __entry->dialog_token = twt->dialog_token;
+ __entry->control = twt->control;
+ __entry->req_type = twt_agrt->req_type;
+ __entry->twt = twt_agrt->twt;
+ __entry->duration = twt_agrt->min_twt_dur;
+ __entry->mantissa = twt_agrt->mantissa;
+ __entry->channel = twt_agrt->channel;
),
TP_printk(
- LOCAL_PR_FMT STA_PR_FMT " block:%d",
- LOCAL_PR_ARG, STA_PR_ARG, __entry->block
+ LOCAL_PR_FMT STA_PR_FMT
+ " token:%d control:0x%02x req_type:0x%04x"
+ " twt:%llu duration:%d mantissa:%d channel:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token,
+ __entry->control, le16_to_cpu(__entry->req_type),
+ le64_to_cpu(__entry->twt), __entry->duration,
+ le16_to_cpu(__entry->mantissa), __entry->channel
)
);
-TRACE_EVENT(api_chswitch_done,
- TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
+TRACE_EVENT(drv_twt_teardown_request,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta, u8 flowid),
- TP_ARGS(sdata, success),
+ TP_ARGS(local, sta, flowid),
TP_STRUCT__entry(
- VIF_ENTRY
- __field(bool, success)
+ LOCAL_ENTRY
+ STA_ENTRY
+ __field(u8, flowid)
),
TP_fast_assign(
- VIF_ASSIGN;
- __entry->success = success;
+ LOCAL_ASSIGN;
+ STA_ASSIGN;
+ __entry->flowid = flowid;
),
TP_printk(
- VIF_PR_FMT " success=%d",
- VIF_PR_ARG, __entry->success
+ LOCAL_PR_FMT STA_PR_FMT " flowid:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid
)
);
-DEFINE_EVENT(local_only_evt, api_ready_on_channel,
- TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local)
-);
-
-DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
- TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local)
+DEFINE_EVENT(sta_event, drv_net_fill_forward_path,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta),
+ TP_ARGS(local, sdata, sta)
);
-TRACE_EVENT(api_gtk_rekey_notify,
- TP_PROTO(struct ieee80211_sub_if_data *sdata,
- const u8 *bssid, const u8 *replay_ctr),
+TRACE_EVENT(drv_change_vif_links,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 old_links, u16 new_links),
- TP_ARGS(sdata, bssid, replay_ctr),
+ TP_ARGS(local, sdata, old_links, new_links),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
- __array(u8, bssid, ETH_ALEN)
- __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN)
+ __field(u16, old_links)
+ __field(u16, new_links)
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
- memcpy(__entry->bssid, bssid, ETH_ALEN);
- memcpy(__entry->replay_ctr, replay_ctr, NL80211_REPLAY_CTR_LEN);
+ __entry->old_links = old_links;
+ __entry->new_links = new_links;
),
- TP_printk(VIF_PR_FMT, VIF_PR_ARG)
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " old_links:0x%04x, new_links:0x%04x\n",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->old_links, __entry->new_links
+ )
);
-TRACE_EVENT(api_enable_rssi_reports,
- TP_PROTO(struct ieee80211_sub_if_data *sdata,
- int rssi_min_thold, int rssi_max_thold),
+TRACE_EVENT(drv_change_sta_links,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ u16 old_links, u16 new_links),
- TP_ARGS(sdata, rssi_min_thold, rssi_max_thold),
+ TP_ARGS(local, sdata, sta, old_links, new_links),
TP_STRUCT__entry(
+ LOCAL_ENTRY
VIF_ENTRY
- __field(int, rssi_min_thold)
- __field(int, rssi_max_thold)
+ STA_ENTRY
+ __field(u16, old_links)
+ __field(u16, new_links)
),
TP_fast_assign(
+ LOCAL_ASSIGN;
VIF_ASSIGN;
- __entry->rssi_min_thold = rssi_min_thold;
- __entry->rssi_max_thold = rssi_max_thold;
+ STA_ASSIGN;
+ __entry->old_links = old_links;
+ __entry->new_links = new_links;
),
TP_printk(
- VIF_PR_FMT " rssi_min_thold =%d, rssi_max_thold = %d",
- VIF_PR_ARG, __entry->rssi_min_thold, __entry->rssi_max_thold
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " old_links:0x%04x, new_links:0x%04x\n",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG,
+ __entry->old_links, __entry->new_links
)
);
-TRACE_EVENT(api_eosp,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sta *sta),
+/*
+ * Tracing for API calls that drivers call.
+ */
- TP_ARGS(local, sta),
+TRACE_EVENT(api_start_tx_ba_session,
+ TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+
+ TP_ARGS(sta, tid),
TP_STRUCT__entry(
- LOCAL_ENTRY
STA_ENTRY
+ __field(u16, tid)
),
TP_fast_assign(
- LOCAL_ASSIGN;
STA_ASSIGN;
+ __entry->tid = tid;
),
TP_printk(
- LOCAL_PR_FMT STA_PR_FMT,
- LOCAL_PR_ARG, STA_PR_ARG
+ STA_PR_FMT " tid:%d",
+ STA_PR_ARG, __entry->tid
)
);
-TRACE_EVENT(api_send_eosp_nullfunc,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sta *sta,
- u8 tid),
+TRACE_EVENT(api_start_tx_ba_cb,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
- TP_ARGS(local, sta, tid),
+ TP_ARGS(sdata, ra, tid),
TP_STRUCT__entry(
- LOCAL_ENTRY
- STA_ENTRY
- __field(u8, tid)
+ VIF_ENTRY
+ __array(u8, ra, ETH_ALEN)
+ __field(u16, tid)
),
TP_fast_assign(
- LOCAL_ASSIGN;
- STA_ASSIGN;
+ VIF_ASSIGN;
+ memcpy(__entry->ra, ra, ETH_ALEN);
__entry->tid = tid;
),
TP_printk(
- LOCAL_PR_FMT STA_PR_FMT " tid:%d",
- LOCAL_PR_ARG, STA_PR_ARG, __entry->tid
+ VIF_PR_FMT " ra:%pM tid:%d",
+ VIF_PR_ARG, __entry->ra, __entry->tid
)
);
-TRACE_EVENT(api_sta_set_buffered,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sta *sta,
- u8 tid, bool buffered),
+TRACE_EVENT(api_stop_tx_ba_session,
+ TP_PROTO(struct ieee80211_sta *sta, u16 tid),
- TP_ARGS(local, sta, tid, buffered),
+ TP_ARGS(sta, tid),
TP_STRUCT__entry(
- LOCAL_ENTRY
STA_ENTRY
- __field(u8, tid)
- __field(bool, buffered)
+ __field(u16, tid)
),
TP_fast_assign(
- LOCAL_ASSIGN;
STA_ASSIGN;
__entry->tid = tid;
- __entry->buffered = buffered;
),
TP_printk(
- LOCAL_PR_FMT STA_PR_FMT " tid:%d buffered:%d",
- LOCAL_PR_ARG, STA_PR_ARG, __entry->tid, __entry->buffered
+ STA_PR_FMT " tid:%d",
+ STA_PR_ARG, __entry->tid
)
);
-/*
- * Tracing for internal functions
- * (which may also be called in response to driver calls)
- */
-
-TRACE_EVENT(wake_queue,
- TP_PROTO(struct ieee80211_local *local, u16 queue,
- enum queue_stop_reason reason),
+TRACE_EVENT(api_stop_tx_ba_cb,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
- TP_ARGS(local, queue, reason),
+ TP_ARGS(sdata, ra, tid),
TP_STRUCT__entry(
- LOCAL_ENTRY
- __field(u16, queue)
- __field(u32, reason)
+ VIF_ENTRY
+ __array(u8, ra, ETH_ALEN)
+ __field(u16, tid)
),
TP_fast_assign(
- LOCAL_ASSIGN;
- __entry->queue = queue;
- __entry->reason = reason;
+ VIF_ASSIGN;
+ memcpy(__entry->ra, ra, ETH_ALEN);
+ __entry->tid = tid;
),
TP_printk(
- LOCAL_PR_FMT " queue:%d, reason:%d",
- LOCAL_PR_ARG, __entry->queue, __entry->reason
+ VIF_PR_FMT " ra:%pM tid:%d",
+ VIF_PR_ARG, __entry->ra, __entry->tid
)
);
-TRACE_EVENT(stop_queue,
- TP_PROTO(struct ieee80211_local *local, u16 queue,
- enum queue_stop_reason reason),
+DEFINE_EVENT(local_only_evt, api_restart_hw,
+ TP_PROTO(struct ieee80211_local *local),
+ TP_ARGS(local)
+);
- TP_ARGS(local, queue, reason),
+TRACE_EVENT(api_beacon_loss,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata),
+
+ TP_ARGS(sdata),
TP_STRUCT__entry(
- LOCAL_ENTRY
- __field(u16, queue)
- __field(u32, reason)
+ VIF_ENTRY
),
TP_fast_assign(
- LOCAL_ASSIGN;
- __entry->queue = queue;
- __entry->reason = reason;
+ VIF_ASSIGN;
),
TP_printk(
- LOCAL_PR_FMT " queue:%d, reason:%d",
- LOCAL_PR_ARG, __entry->queue, __entry->reason
+ VIF_PR_FMT,
+ VIF_PR_ARG
)
);
-TRACE_EVENT(drv_set_default_unicast_key,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- int key_idx),
+TRACE_EVENT(api_connection_loss,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata),
- TP_ARGS(local, sdata, key_idx),
+ TP_ARGS(sdata),
TP_STRUCT__entry(
- LOCAL_ENTRY
VIF_ENTRY
- __field(int, key_idx)
),
TP_fast_assign(
- LOCAL_ASSIGN;
VIF_ASSIGN;
- __entry->key_idx = key_idx;
),
- TP_printk(LOCAL_PR_FMT VIF_PR_FMT " key_idx:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->key_idx)
+ TP_printk(
+ VIF_PR_FMT,
+ VIF_PR_ARG
+ )
);
-TRACE_EVENT(api_radar_detected,
- TP_PROTO(struct ieee80211_local *local),
+TRACE_EVENT(api_disconnect,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata, bool reconnect),
- TP_ARGS(local),
+ TP_ARGS(sdata, reconnect),
TP_STRUCT__entry(
- LOCAL_ENTRY
+ VIF_ENTRY
+ __field(int, reconnect)
),
TP_fast_assign(
- LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->reconnect = reconnect;
),
TP_printk(
- LOCAL_PR_FMT " radar detected",
- LOCAL_PR_ARG
+ VIF_PR_FMT " reconnect:%d",
+ VIF_PR_ARG, __entry->reconnect
)
);
-TRACE_EVENT(drv_channel_switch_beacon,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct cfg80211_chan_def *chandef),
+TRACE_EVENT(api_cqm_rssi_notify,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata,
+ enum nl80211_cqm_rssi_threshold_event rssi_event,
+ s32 rssi_level),
- TP_ARGS(local, sdata, chandef),
+ TP_ARGS(sdata, rssi_event, rssi_level),
TP_STRUCT__entry(
- LOCAL_ENTRY
VIF_ENTRY
- CHANDEF_ENTRY
+ __field(u32, rssi_event)
+ __field(s32, rssi_level)
),
TP_fast_assign(
- LOCAL_ASSIGN;
VIF_ASSIGN;
- CHANDEF_ASSIGN(chandef);
+ __entry->rssi_event = rssi_event;
+ __entry->rssi_level = rssi_level;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " channel switch to " CHANDEF_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG
+ VIF_PR_FMT " event:%d rssi:%d",
+ VIF_PR_ARG, __entry->rssi_event, __entry->rssi_level
)
);
-TRACE_EVENT(drv_pre_channel_switch,
+DEFINE_EVENT(local_sdata_evt, api_cqm_beacon_loss_notify,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel_switch *ch_switch),
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
- TP_ARGS(local, sdata, ch_switch),
+TRACE_EVENT(api_scan_completed,
+ TP_PROTO(struct ieee80211_local *local, bool aborted),
+
+ TP_ARGS(local, aborted),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
- CHANDEF_ENTRY
- __field(u64, timestamp)
- __field(u32, device_timestamp)
- __field(bool, block_tx)
- __field(u8, count)
+ __field(bool, aborted)
),
TP_fast_assign(
LOCAL_ASSIGN;
- VIF_ASSIGN;
- CHANDEF_ASSIGN(&ch_switch->chandef)
- __entry->timestamp = ch_switch->timestamp;
- __entry->device_timestamp = ch_switch->device_timestamp;
- __entry->block_tx = ch_switch->block_tx;
- __entry->count = ch_switch->count;
+ __entry->aborted = aborted;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " prepare channel switch to "
- CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
- __entry->block_tx, __entry->timestamp
+ LOCAL_PR_FMT " aborted:%d",
+ LOCAL_PR_ARG, __entry->aborted
)
);
-DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata),
- TP_ARGS(local, sdata)
-);
-
-DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata),
- TP_ARGS(local, sdata)
-);
-
-TRACE_EVENT(drv_channel_switch_rx_beacon,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_channel_switch *ch_switch),
+TRACE_EVENT(api_sched_scan_results,
+ TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local, sdata, ch_switch),
+ TP_ARGS(local),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
- CHANDEF_ENTRY
- __field(u64, timestamp)
- __field(u32, device_timestamp)
- __field(bool, block_tx)
- __field(u8, count)
),
TP_fast_assign(
LOCAL_ASSIGN;
- VIF_ASSIGN;
- CHANDEF_ASSIGN(&ch_switch->chandef)
- __entry->timestamp = ch_switch->timestamp;
- __entry->device_timestamp = ch_switch->device_timestamp;
- __entry->block_tx = ch_switch->block_tx;
- __entry->count = ch_switch->count;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT
- " received a channel switch beacon to "
- CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu",
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
- __entry->block_tx, __entry->timestamp
+ LOCAL_PR_FMT, LOCAL_PR_ARG
)
);
-TRACE_EVENT(drv_get_txpower,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- int dbm, int ret),
+TRACE_EVENT(api_sched_scan_stopped,
+ TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local, sdata, dbm, ret),
+ TP_ARGS(local),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
- __field(int, dbm)
- __field(int, ret)
),
TP_fast_assign(
LOCAL_ASSIGN;
- VIF_ASSIGN;
- __entry->dbm = dbm;
- __entry->ret = ret;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " dbm:%d ret:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->dbm, __entry->ret
+ LOCAL_PR_FMT, LOCAL_PR_ARG
)
);
-TRACE_EVENT(drv_tdls_channel_switch,
+TRACE_EVENT(api_sta_block_awake,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u8 oper_class,
- struct cfg80211_chan_def *chandef),
+ struct ieee80211_sta *sta, bool block),
- TP_ARGS(local, sdata, sta, oper_class, chandef),
+ TP_ARGS(local, sta, block),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
STA_ENTRY
- __field(u8, oper_class)
- CHANDEF_ENTRY
+ __field(bool, block)
),
TP_fast_assign(
LOCAL_ASSIGN;
- VIF_ASSIGN;
STA_ASSIGN;
- __entry->oper_class = oper_class;
- CHANDEF_ASSIGN(chandef)
+ __entry->block = block;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " tdls channel switch to"
- CHANDEF_PR_FMT " oper_class:%d " STA_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->oper_class,
- STA_PR_ARG
+ LOCAL_PR_FMT STA_PR_FMT " block:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->block
)
);
-TRACE_EVENT(drv_tdls_cancel_channel_switch,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta),
+TRACE_EVENT(api_chswitch_done,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
- TP_ARGS(local, sdata, sta),
+ TP_ARGS(sdata, success),
TP_STRUCT__entry(
- LOCAL_ENTRY
VIF_ENTRY
- STA_ENTRY
+ __field(bool, success)
),
TP_fast_assign(
- LOCAL_ASSIGN;
VIF_ASSIGN;
- STA_ASSIGN;
+ __entry->success = success;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT
- " tdls cancel channel switch with " STA_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+ VIF_PR_FMT " success=%d",
+ VIF_PR_ARG, __entry->success
)
);
-TRACE_EVENT(drv_tdls_recv_channel_switch,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_tdls_ch_sw_params *params),
+DEFINE_EVENT(local_only_evt, api_ready_on_channel,
+ TP_PROTO(struct ieee80211_local *local),
+ TP_ARGS(local)
+);
- TP_ARGS(local, sdata, params),
+DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
+ TP_PROTO(struct ieee80211_local *local),
+ TP_ARGS(local)
+);
+
+TRACE_EVENT(api_gtk_rekey_notify,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata,
+ const u8 *bssid, const u8 *replay_ctr),
+
+ TP_ARGS(sdata, bssid, replay_ctr),
TP_STRUCT__entry(
- LOCAL_ENTRY
VIF_ENTRY
- __field(u8, action_code)
- STA_ENTRY
- CHANDEF_ENTRY
- __field(u32, status)
- __field(bool, peer_initiator)
- __field(u32, timestamp)
- __field(u16, switch_time)
- __field(u16, switch_timeout)
+ __array(u8, bssid, ETH_ALEN)
+ __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN)
),
TP_fast_assign(
- LOCAL_ASSIGN;
VIF_ASSIGN;
- STA_NAMED_ASSIGN(params->sta);
- CHANDEF_ASSIGN(params->chandef)
- __entry->peer_initiator = params->sta->tdls_initiator;
- __entry->action_code = params->action_code;
- __entry->status = params->status;
- __entry->timestamp = params->timestamp;
- __entry->switch_time = params->switch_time;
- __entry->switch_timeout = params->switch_timeout;
+ memcpy(__entry->bssid, bssid, ETH_ALEN);
+ memcpy(__entry->replay_ctr, replay_ctr, NL80211_REPLAY_CTR_LEN);
+ ),
+
+ TP_printk(VIF_PR_FMT, VIF_PR_ARG)
+);
+
+TRACE_EVENT(api_enable_rssi_reports,
+ TP_PROTO(struct ieee80211_sub_if_data *sdata,
+ int rssi_min_thold, int rssi_max_thold),
+
+ TP_ARGS(sdata, rssi_min_thold, rssi_max_thold),
+
+ TP_STRUCT__entry(
+ VIF_ENTRY
+ __field(int, rssi_min_thold)
+ __field(int, rssi_max_thold)
+ ),
+
+ TP_fast_assign(
+ VIF_ASSIGN;
+ __entry->rssi_min_thold = rssi_min_thold;
+ __entry->rssi_max_thold = rssi_max_thold;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " received tdls channel switch packet"
- " action:%d status:%d time:%d switch time:%d switch"
- " timeout:%d initiator: %d chan:" CHANDEF_PR_FMT STA_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->action_code, __entry->status,
- __entry->timestamp, __entry->switch_time,
- __entry->switch_timeout, __entry->peer_initiator,
- CHANDEF_PR_ARG, STA_PR_ARG
+ VIF_PR_FMT " rssi_min_thold =%d, rssi_max_thold = %d",
+ VIF_PR_ARG, __entry->rssi_min_thold, __entry->rssi_max_thold
)
);
-TRACE_EVENT(drv_wake_tx_queue,
+TRACE_EVENT(api_eosp,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct txq_info *txq),
+ struct ieee80211_sta *sta),
- TP_ARGS(local, sdata, txq),
+ TP_ARGS(local, sta),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
STA_ENTRY
- __field(u8, ac)
- __field(u8, tid)
),
TP_fast_assign(
- struct ieee80211_sta *sta = txq->txq.sta;
-
LOCAL_ASSIGN;
- VIF_ASSIGN;
STA_ASSIGN;
- __entry->ac = txq->txq.ac;
- __entry->tid = txq->txq.tid;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ac:%d tid:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid
+ LOCAL_PR_FMT STA_PR_FMT,
+ LOCAL_PR_ARG, STA_PR_ARG
)
);
-TRACE_EVENT(drv_get_ftm_responder_stats,
+TRACE_EVENT(api_send_eosp_nullfunc,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct cfg80211_ftm_responder_stats *ftm_stats),
+ struct ieee80211_sta *sta,
+ u8 tid),
- TP_ARGS(local, sdata, ftm_stats),
+ TP_ARGS(local, sta, tid),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
+ STA_ENTRY
+ __field(u8, tid)
),
TP_fast_assign(
LOCAL_ASSIGN;
- VIF_ASSIGN;
+ STA_ASSIGN;
+ __entry->tid = tid;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG
+ LOCAL_PR_FMT STA_PR_FMT " tid:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->tid
)
);
-DEFINE_EVENT(local_sdata_addr_evt, drv_update_vif_offload,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata),
- TP_ARGS(local, sdata)
-);
-
-DECLARE_EVENT_CLASS(sta_flag_evt,
+TRACE_EVENT(api_sta_set_buffered,
TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, bool enabled),
+ struct ieee80211_sta *sta,
+ u8 tid, bool buffered),
- TP_ARGS(local, sdata, sta, enabled),
+ TP_ARGS(local, sta, tid, buffered),
TP_STRUCT__entry(
LOCAL_ENTRY
- VIF_ENTRY
STA_ENTRY
- __field(bool, enabled)
+ __field(u8, tid)
+ __field(bool, buffered)
),
TP_fast_assign(
LOCAL_ASSIGN;
- VIF_ASSIGN;
STA_ASSIGN;
- __entry->enabled = enabled;
+ __entry->tid = tid;
+ __entry->buffered = buffered;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " enabled:%d",
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->enabled
+ LOCAL_PR_FMT STA_PR_FMT " tid:%d buffered:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->tid, __entry->buffered
)
);
-DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, bool enabled),
+TRACE_EVENT(api_radar_detected,
+ TP_PROTO(struct ieee80211_local *local),
- TP_ARGS(local, sdata, sta, enabled)
-);
+ TP_ARGS(local),
-DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, bool enabled),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ ),
- TP_ARGS(local, sdata, sta, enabled)
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " radar detected",
+ LOCAL_PR_ARG
+ )
);
-TRACE_EVENT(drv_add_twt_setup,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sta *sta,
- struct ieee80211_twt_setup *twt,
- struct ieee80211_twt_params *twt_agrt),
+/*
+ * Tracing for internal functions
+ * (which may also be called in response to driver calls)
+ */
- TP_ARGS(local, sta, twt, twt_agrt),
+TRACE_EVENT(wake_queue,
+ TP_PROTO(struct ieee80211_local *local, u16 queue,
+ enum queue_stop_reason reason),
+
+ TP_ARGS(local, queue, reason),
TP_STRUCT__entry(
LOCAL_ENTRY
- STA_ENTRY
- __field(u8, dialog_token)
- __field(u8, control)
- __field(__le16, req_type)
- __field(__le64, twt)
- __field(u8, duration)
- __field(__le16, mantissa)
- __field(u8, channel)
+ __field(u16, queue)
+ __field(u32, reason)
),
TP_fast_assign(
LOCAL_ASSIGN;
- STA_ASSIGN;
- __entry->dialog_token = twt->dialog_token;
- __entry->control = twt->control;
- __entry->req_type = twt_agrt->req_type;
- __entry->twt = twt_agrt->twt;
- __entry->duration = twt_agrt->min_twt_dur;
- __entry->mantissa = twt_agrt->mantissa;
- __entry->channel = twt_agrt->channel;
+ __entry->queue = queue;
+ __entry->reason = reason;
),
TP_printk(
- LOCAL_PR_FMT STA_PR_FMT
- " token:%d control:0x%02x req_type:0x%04x"
- " twt:%llu duration:%d mantissa:%d channel:%d",
- LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token,
- __entry->control, le16_to_cpu(__entry->req_type),
- le64_to_cpu(__entry->twt), __entry->duration,
- le16_to_cpu(__entry->mantissa), __entry->channel
+ LOCAL_PR_FMT " queue:%d, reason:%d",
+ LOCAL_PR_ARG, __entry->queue, __entry->reason
)
);
-TRACE_EVENT(drv_twt_teardown_request,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sta *sta, u8 flowid),
+TRACE_EVENT(stop_queue,
+ TP_PROTO(struct ieee80211_local *local, u16 queue,
+ enum queue_stop_reason reason),
- TP_ARGS(local, sta, flowid),
+ TP_ARGS(local, queue, reason),
TP_STRUCT__entry(
LOCAL_ENTRY
- STA_ENTRY
- __field(u8, flowid)
+ __field(u16, queue)
+ __field(u32, reason)
),
TP_fast_assign(
LOCAL_ASSIGN;
- STA_ASSIGN;
- __entry->flowid = flowid;
+ __entry->queue = queue;
+ __entry->reason = reason;
),
TP_printk(
- LOCAL_PR_FMT STA_PR_FMT " flowid:%d",
- LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid
+ LOCAL_PR_FMT " queue:%d, reason:%d",
+ LOCAL_PR_ARG, __entry->queue, __entry->reason
)
);
-DEFINE_EVENT(sta_event, drv_net_fill_forward_path,
- TP_PROTO(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta),
- TP_ARGS(local, sdata, sta)
-);
-
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 40141df09f25..c9dbe9aab7bd 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -24,13 +24,11 @@ DECLARE_EVENT_CLASS(mac80211_msg_event,
TP_ARGS(vaf),
TP_STRUCT__entry(
- __dynamic_array(char, msg, MAX_MSG_LEN)
+ __vstring(msg, vaf->fmt, vaf->va)
),
TP_fast_assign(
- WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >= MAX_MSG_LEN);
+ __assign_vstr(msg, vaf->fmt, vaf->va);
),
TP_printk("%s", __get_str(msg))
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6d054fed062f..874f2a4d831d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -18,7 +18,6 @@
#include <linux/bitmap.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
-#include <linux/timekeeping.h>
#include <net/net_namespace.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
@@ -57,7 +56,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
return 0;
rcu_read_lock();
- chanctx_conf = rcu_dereference(tx->sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(tx->sdata->vif.bss_conf.chanctx_conf);
if (chanctx_conf) {
shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
@@ -149,7 +148,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
case NL80211_BAND_2GHZ:
case NL80211_BAND_LC: {
u32 flag;
- if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+ if (tx->sdata->deflink.operating_11g_mode)
flag = IEEE80211_RATE_MANDATORY_G;
else
flag = IEEE80211_RATE_MANDATORY_B;
@@ -577,6 +576,51 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
return TX_CONTINUE;
}
+static struct ieee80211_key *
+ieee80211_select_link_key(struct ieee80211_tx_data *tx)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ enum {
+ USE_NONE,
+ USE_MGMT_KEY,
+ USE_MCAST_KEY,
+ } which_key = USE_NONE;
+ struct ieee80211_link_data *link;
+ unsigned int link_id;
+
+ if (ieee80211_is_group_privacy_action(tx->skb))
+ which_key = USE_MCAST_KEY;
+ else if (ieee80211_is_mgmt(hdr->frame_control) &&
+ is_multicast_ether_addr(hdr->addr1) &&
+ ieee80211_is_robust_mgmt_frame(tx->skb))
+ which_key = USE_MGMT_KEY;
+ else if (is_multicast_ether_addr(hdr->addr1))
+ which_key = USE_MCAST_KEY;
+ else
+ return NULL;
+
+ link_id = u32_get_bits(info->control.flags, IEEE80211_TX_CTRL_MLO_LINK);
+ if (link_id == IEEE80211_LINK_UNSPECIFIED) {
+ link = &tx->sdata->deflink;
+ } else {
+ link = rcu_dereference(tx->sdata->link[link_id]);
+ if (!link)
+ return NULL;
+ }
+
+ switch (which_key) {
+ case USE_NONE:
+ break;
+ case USE_MGMT_KEY:
+ return rcu_dereference(link->default_mgmt_key);
+ case USE_MCAST_KEY:
+ return rcu_dereference(link->default_multicast_key);
+ }
+
+ return NULL;
+}
+
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
{
@@ -592,16 +636,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
if (tx->sta &&
(key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
tx->key = key;
- else if (ieee80211_is_group_privacy_action(tx->skb) &&
- (key = rcu_dereference(tx->sdata->default_multicast_key)))
- tx->key = key;
- else if (ieee80211_is_mgmt(hdr->frame_control) &&
- is_multicast_ether_addr(hdr->addr1) &&
- ieee80211_is_robust_mgmt_frame(tx->skb) &&
- (key = rcu_dereference(tx->sdata->default_mgmt_key)))
- tx->key = key;
- else if (is_multicast_ether_addr(hdr->addr1) &&
- (key = rcu_dereference(tx->sdata->default_multicast_key)))
+ else if ((key = ieee80211_select_link_key(tx)))
tx->key = key;
else if (!is_multicast_ether_addr(hdr->addr1) &&
(key = rcu_dereference(tx->sdata->default_unicast_key)))
@@ -768,9 +803,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
if (txrc.reported_rate.idx < 0) {
txrc.reported_rate = tx->rate;
if (tx->sta && ieee80211_is_tx_data(tx->skb))
- tx->sta->tx_stats.last_rate = txrc.reported_rate;
+ tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate;
} else if (tx->sta)
- tx->sta->tx_stats.last_rate = txrc.reported_rate;
+ tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate;
if (ratetbl)
return TX_CONTINUE;
@@ -823,6 +858,16 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
if (info->control.flags & IEEE80211_TX_CTRL_NO_SEQNO)
return TX_CONTINUE;
+ /* SNS11 from 802.11be 10.3.2.14 */
+ if (unlikely(is_multicast_ether_addr(hdr->addr1) &&
+ info->control.vif->valid_links &&
+ info->control.vif->type == NL80211_IFTYPE_AP)) {
+ if (info->control.flags & IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX)
+ tx->sdata->mld_mcast_seq += 0x10;
+ hdr->seq_ctrl = cpu_to_le16(tx->sdata->mld_mcast_seq);
+ return TX_CONTINUE;
+ }
+
/*
* Anything but QoS data that has a sequence number field
* (is long enough) gets a sequence number from the global
@@ -837,7 +882,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number);
tx->sdata->sequence_number += 0x10;
if (tx->sta)
- tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++;
+ tx->sta->deflink.tx_stats.msdu[IEEE80211_NUM_TIDS]++;
return TX_CONTINUE;
}
@@ -851,7 +896,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
/* include per-STA, per-TID sequence counter */
tid = ieee80211_get_tid(hdr);
- tx->sta->tx_stats.msdu[tid]++;
+ tx->sta->deflink.tx_stats.msdu[tid]++;
hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -882,7 +927,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx,
rem -= fraglen;
tmp = dev_alloc_skb(local->tx_headroom +
frag_threshold +
- tx->sdata->encrypt_headroom +
+ IEEE80211_ENCRYPT_HEADROOM +
IEEE80211_ENCRYPT_TAILROOM);
if (!tmp)
return -ENOMEM;
@@ -890,7 +935,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx,
__skb_queue_tail(&tx->skbs, tmp);
skb_reserve(tmp,
- local->tx_headroom + tx->sdata->encrypt_headroom);
+ local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM);
/* copy control information */
memcpy(tmp->cb, skb->cb, sizeof(tmp->cb));
@@ -1004,10 +1049,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
skb_queue_walk(&tx->skbs, skb) {
ac = skb_get_queue_mapping(skb);
- tx->sta->tx_stats.bytes[ac] += skb->len;
+ tx->sta->deflink.tx_stats.bytes[ac] += skb->len;
}
if (ac >= 0)
- tx->sta->tx_stats.packets[ac]++;
+ tx->sta->deflink.tx_stats.packets[ac]++;
return TX_CONTINUE;
}
@@ -1040,8 +1085,6 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
return ieee80211_crypto_gcmp_encrypt(tx);
- default:
- return ieee80211_crypto_hw_encrypt(tx);
}
return TX_DROP;
@@ -1159,7 +1202,7 @@ ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
return;
- if (!sta || !sta->sta.ht_cap.ht_supported ||
+ if (!sta || !sta->sta.deflink.ht_cap.ht_supported ||
!sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
skb->protocol == sdata->control_port_protocol)
return;
@@ -1481,7 +1524,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
__skb_queue_head_init(&txqi->frags);
- RB_CLEAR_NODE(&txqi->schedule_order);
+ INIT_LIST_HEAD(&txqi->schedule_order);
txqi->txq.vif = &sdata->vif;
@@ -1525,7 +1568,9 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
spin_unlock_bh(&fq->lock);
- ieee80211_unschedule_txq(&local->hw, &txqi->txq, true);
+ spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
+ list_del_init(&txqi->schedule_order);
+ spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
}
void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -2013,7 +2058,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
headroom = local->tx_headroom;
if (encrypt != ENCRYPT_NO)
- headroom += sdata->encrypt_headroom;
+ headroom += IEEE80211_ENCRYPT_HEADROOM;
headroom -= skb_headroom(skb);
headroom = max_t(int, 0, headroom);
@@ -2274,6 +2319,10 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
u16 len_rthdr;
int hdrlen;
+ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ if (unlikely(!ieee80211_sdata_running(sdata)))
+ goto fail;
+
memset(info, 0, sizeof(*info));
info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_CTL_INJECTED;
@@ -2333,8 +2382,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
* This is necessary, for example, for old hostapd versions that
* don't use nl80211-based management TX/RX.
*/
- sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
list_for_each_entry_rcu(tmp_sdata, &local->interfaces, list) {
if (!ieee80211_sdata_running(tmp_sdata))
continue;
@@ -2347,12 +2394,12 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
}
}
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (!chanctx_conf) {
tmp_sdata = rcu_dereference(local->monitor_sdata);
if (tmp_sdata)
chanctx_conf =
- rcu_dereference(tmp_sdata->vif.chanctx_conf);
+ rcu_dereference(tmp_sdata->vif.bss_conf.chanctx_conf);
}
if (chanctx_conf)
@@ -2479,7 +2526,7 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
}
- sta = sta_info_get(sdata, sdata->u.mgd.bssid);
+ sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
if (!sta)
return -ENOLINK;
break;
@@ -2568,10 +2615,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
bool tdls_peer;
bool multicast;
u16 info_id = 0;
- struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_sub_if_data *ap_sdata;
+ struct ieee80211_chanctx_conf *chanctx_conf = NULL;
enum nl80211_band band;
int ret;
+ u8 link_id = u32_get_bits(ctrl_flags, IEEE80211_TX_CTRL_MLO_LINK);
if (IS_ERR(sta))
sta = NULL;
@@ -2586,6 +2633,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
ethertype = (skb->data[12] << 8) | skb->data[13];
fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
+ if (!sdata->vif.valid_links)
+ chanctx_conf =
+ rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
if (sdata->wdev.use_4addr) {
@@ -2599,31 +2650,51 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED);
wme_sta = sta->sta.wme;
}
- ap_sdata = container_of(sdata->bss, struct ieee80211_sub_if_data,
- u.ap);
- chanctx_conf = rcu_dereference(ap_sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- ret = -ENOTCONN;
- goto free;
+ if (!sdata->vif.valid_links) {
+ struct ieee80211_sub_if_data *ap_sdata;
+
+ /* override chanctx_conf from AP (we don't have one) */
+ ap_sdata = container_of(sdata->bss,
+ struct ieee80211_sub_if_data,
+ u.ap);
+ chanctx_conf =
+ rcu_dereference(ap_sdata->vif.bss_conf.chanctx_conf);
}
- band = chanctx_conf->def.chan->band;
if (sdata->wdev.use_4addr)
break;
fallthrough;
case NL80211_IFTYPE_AP:
- if (sdata->vif.type == NL80211_IFTYPE_AP)
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- ret = -ENOTCONN;
- goto free;
- }
fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
/* DA BSSID SA */
memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
+
+ if (sdata->vif.valid_links && sta && !sta->sta.mlo) {
+ struct ieee80211_link_data *link;
+
+ link_id = sta->deflink.link_id;
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link)) {
+ ret = -ENOLINK;
+ goto free;
+ }
+ memcpy(hdr.addr2, link->conf->addr, ETH_ALEN);
+ } else if (link_id == IEEE80211_LINK_UNSPECIFIED ||
+ (sta && sta->sta.mlo)) {
+ memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
+ } else {
+ struct ieee80211_bss_conf *conf;
+
+ conf = rcu_dereference(sdata->vif.link_conf[link_id]);
+ if (unlikely(!conf)) {
+ ret = -ENOLINK;
+ goto free;
+ }
+
+ memcpy(hdr.addr2, conf->addr, ETH_ALEN);
+ }
+
memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
hdrlen = 24;
- band = chanctx_conf->def.chan->band;
break;
#ifdef CONFIG_MAC80211_MESH
case NL80211_IFTYPE_MESH_POINT:
@@ -2691,12 +2762,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
skb->data + ETH_ALEN);
}
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- ret = -ENOTCONN;
- goto free;
- }
- band = chanctx_conf->def.chan->band;
/* For injected frames, fill RA right away as nexthop lookup
* will be skipped.
@@ -2714,14 +2779,14 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
/* DA SA BSSID */
memcpy(hdr.addr1, skb->data, ETH_ALEN);
memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr.addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN);
hdrlen = 24;
} else if (sdata->u.mgd.use_4addr &&
cpu_to_be16(ethertype) != sdata->control_port_protocol) {
fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
IEEE80211_FCTL_TODS);
/* RA TA DA SA */
- memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr.addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
memcpy(hdr.addr3, skb->data, ETH_ALEN);
memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
@@ -2729,17 +2794,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
} else {
fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
/* BSSID SA DA */
- memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr.addr1, sdata->vif.cfg.ap_addr, ETH_ALEN);
memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
memcpy(hdr.addr3, skb->data, ETH_ALEN);
hdrlen = 24;
}
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- ret = -ENOTCONN;
- goto free;
- }
- band = chanctx_conf->def.chan->band;
break;
case NL80211_IFTYPE_OCB:
/* DA SA BSSID */
@@ -2747,12 +2806,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
eth_broadcast_addr(hdr.addr3);
hdrlen = 24;
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- ret = -ENOTCONN;
- goto free;
- }
- band = chanctx_conf->def.chan->band;
break;
case NL80211_IFTYPE_ADHOC:
/* DA SA BSSID */
@@ -2760,18 +2813,23 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN);
hdrlen = 24;
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
- ret = -ENOTCONN;
- goto free;
- }
- band = chanctx_conf->def.chan->band;
break;
default:
ret = -EINVAL;
goto free;
}
+ if (!chanctx_conf) {
+ if (!sdata->vif.valid_links) {
+ ret = -ENOTCONN;
+ goto free;
+ }
+ /* MLD transmissions must not rely on the band */
+ band = 0;
+ } else {
+ band = chanctx_conf->def.chan->band;
+ }
+
multicast = is_multicast_ether_addr(hdr.addr1);
/* sta is always NULL for mesh */
@@ -2797,7 +2855,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
(sdata->vif.type != NL80211_IFTYPE_OCB) &&
!multicast && !authorized &&
(cpu_to_be16(ethertype) != sdata->control_port_protocol ||
- !ether_addr_equal(sdata->vif.addr, skb->data + ETH_ALEN)))) {
+ !ieee80211_is_our_addr(sdata, skb->data + ETH_ALEN, NULL)))) {
#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
net_info_ratelimited("%s: dropped frame to %pM (unauthorized port)\n",
sdata->name, hdr.addr1);
@@ -2818,19 +2876,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
/*
* If the skb is shared we need to obtain our own copy.
*/
- if (skb_shared(skb)) {
- struct sk_buff *tmp_skb = skb;
-
- /* can't happen -- skb is a clone if info_id != 0 */
- WARN_ON(info_id);
-
- skb = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(tmp_skb);
-
- if (!skb) {
- ret = -ENOMEM;
- goto free;
- }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ ret = -ENOMEM;
+ goto free;
}
hdr.frame_control = fc;
@@ -2867,7 +2916,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
*/
if (head_need > 0 || skb_cloned(skb)) {
- head_need += sdata->encrypt_headroom;
+ head_need += IEEE80211_ENCRYPT_HEADROOM;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) {
@@ -2906,6 +2955,34 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
info->flags = info_flags;
info->ack_frame_id = info_id;
info->band = band;
+
+ if (likely(!cookie)) {
+ ctrl_flags |= u32_encode_bits(link_id,
+ IEEE80211_TX_CTRL_MLO_LINK);
+ } else {
+ unsigned int pre_conf_link_id;
+
+ /*
+ * ctrl_flags already have been set by
+ * ieee80211_tx_control_port(), here
+ * we just sanity check that
+ */
+
+ pre_conf_link_id = u32_get_bits(ctrl_flags,
+ IEEE80211_TX_CTRL_MLO_LINK);
+
+ if (pre_conf_link_id != link_id &&
+ link_id != IEEE80211_LINK_UNSPECIFIED) {
+#ifdef CPTCFG_MAC80211_VERBOSE_DEBUG
+ net_info_ratelimited("%s: dropped frame to %pM with bad link ID request (%d vs. %d)\n",
+ sdata->name, hdr.addr1,
+ pre_conf_link_id, link_id);
+#endif
+ ret = -EINVAL;
+ goto free;
+ }
+ }
+
info->control.flags = ctrl_flags;
return skb;
@@ -2982,14 +3059,20 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
!ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG))
goto out;
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (!chanctx_conf) {
+ if (!sdata->vif.valid_links) {
+ rcu_read_lock();
+ chanctx_conf =
+ rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ goto out;
+ }
+ build.band = chanctx_conf->def.chan->band;
rcu_read_unlock();
- goto out;
+ } else {
+ /* MLD transmissions must not rely on the band */
+ build.band = 0;
}
- build.band = chanctx_conf->def.chan->band;
- rcu_read_unlock();
fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
@@ -3006,7 +3089,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* DA SA BSSID */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
- memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN);
build.hdr_len = 24;
break;
}
@@ -3016,7 +3099,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
IEEE80211_FCTL_TODS);
/* RA TA DA SA */
- memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
build.da_offs = offsetof(struct ieee80211_hdr, addr3);
build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
@@ -3025,7 +3108,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
}
fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
/* BSSID SA DA */
- memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr->addr1, sdata->vif.cfg.ap_addr, ETH_ALEN);
build.da_offs = offsetof(struct ieee80211_hdr, addr3);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
build.hdr_len = 24;
@@ -3047,7 +3130,21 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
/* DA BSSID SA */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
- memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ if (sta->sta.mlo || !sdata->vif.valid_links) {
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ } else {
+ unsigned int link_id = sta->deflink.link_id;
+ struct ieee80211_link_data *link;
+
+ rcu_read_lock();
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON(!link)) {
+ rcu_read_unlock();
+ goto out;
+ }
+ memcpy(hdr->addr2, link->conf->addr, ETH_ALEN);
+ rcu_read_unlock();
+ }
build.sa_offs = offsetof(struct ieee80211_hdr, addr3);
build.hdr_len = 24;
break;
@@ -3128,15 +3225,6 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* we don't know how to generate IVs for this at all */
if (WARN_ON(gen_iv))
goto out;
- /* pure hardware keys are OK, of course */
- if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME))
- break;
- /* cipher scheme might require space allocation */
- if (iv_spc &&
- build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV)
- goto out;
- if (iv_spc)
- build.hdr_len += build.key->conf.iv_len;
}
fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
@@ -3150,8 +3238,6 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC);
/* if the kmemdup fails, continue w/o fast_tx */
- if (!fast_tx)
- goto out;
out:
/* we might have raced against another call to this function */
@@ -3263,7 +3349,7 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
*/
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- bssid = sdata->u.mgd.bssid;
+ bssid = sdata->vif.cfg.ap_addr;
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
@@ -3303,7 +3389,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
int subframe_len = skb->len - ETH_ALEN;
u8 max_subframes = sta->sta.max_amsdu_subframes;
int max_frags = local->hw.max_tx_fragments;
- int max_amsdu_len = sta->sta.max_amsdu_len;
+ int max_amsdu_len = sta->sta.cur->max_amsdu_len;
int orig_truesize;
u32 flow_idx;
__be16 len;
@@ -3329,13 +3415,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags))
return false;
- if (sta->sta.max_rc_amsdu_len)
+ if (sta->sta.cur->max_rc_amsdu_len)
max_amsdu_len = min_t(int, max_amsdu_len,
- sta->sta.max_rc_amsdu_len);
+ sta->sta.cur->max_rc_amsdu_len);
- if (sta->sta.max_tid_amsdu_len[tid])
+ if (sta->sta.cur->max_tid_amsdu_len[tid])
max_amsdu_len = min_t(int, max_amsdu_len,
- sta->sta.max_tid_amsdu_len[tid]);
+ sta->sta.cur->max_tid_amsdu_len[tid]);
flow_idx = fq_flow_idx(fq, skb);
@@ -3462,18 +3548,18 @@ ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
}
if (skb_shinfo(skb)->gso_size)
- sta->tx_stats.msdu[tid] +=
+ sta->deflink.tx_stats.msdu[tid] +=
DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
else
- sta->tx_stats.msdu[tid]++;
+ sta->deflink.tx_stats.msdu[tid]++;
info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
/* statistics normally done by ieee80211_tx_h_stats (but that
* has to consider fragmentation, so is more complex)
*/
- sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+ sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++;
if (pn_offs) {
u64 pn;
@@ -3541,15 +3627,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
/* after this point (skb is modified) we cannot return false */
- if (skb_shared(skb)) {
- struct sk_buff *tmp_skb = skb;
-
- skb = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(tmp_skb);
-
- if (!skb)
- return true;
- }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return true;
if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
@@ -3580,7 +3660,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
IEEE80211_TX_CTL_DONTFRAG |
(tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
- info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
+ info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT |
+ u32_encode_bits(IEEE80211_LINK_UNSPECIFIED,
+ IEEE80211_TX_CTRL_MLO_LINK);
#ifdef CONFIG_MAC80211_DEBUGFS
if (local->force_tx_status)
@@ -3692,8 +3774,8 @@ begin:
!test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) &&
(!(info->control.flags &
IEEE80211_TX_CTRL_PORT_CTRL_PROTO) ||
- !ether_addr_equal(tx.sdata->vif.addr,
- hdr->addr2)))) {
+ !ieee80211_is_our_addr(tx.sdata, hdr->addr2,
+ NULL)))) {
I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
ieee80211_free_txskb(&local->hw, skb);
goto begin;
@@ -3792,7 +3874,7 @@ begin:
encap_out:
IEEE80211_SKB_CB(skb)->control.vif = vif;
- if (vif &&
+ if (tx.sta &&
wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
bool ampdu = txq->ac != IEEE80211_AC_VO;
u32 airtime;
@@ -3817,262 +3899,147 @@ out:
}
EXPORT_SYMBOL(ieee80211_tx_dequeue);
-struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
-{
- struct ieee80211_local *local = hw_to_local(hw);
- struct airtime_sched_info *air_sched;
- u64 now = ktime_get_coarse_boottime_ns();
- struct ieee80211_txq *ret = NULL;
- struct airtime_info *air_info;
- struct txq_info *txqi = NULL;
- struct rb_node *node;
- bool first = false;
-
- air_sched = &local->airtime[ac];
- spin_lock_bh(&air_sched->lock);
-
- node = air_sched->schedule_pos;
-
-begin:
- if (!node) {
- node = rb_first_cached(&air_sched->active_txqs);
- first = true;
- } else {
- node = rb_next(node);
- }
-
- if (!node)
- goto out;
-
- txqi = container_of(node, struct txq_info, schedule_order);
- air_info = to_airtime_info(&txqi->txq);
-
- if (air_info->v_t > air_sched->v_t &&
- (!first || !airtime_catchup_v_t(air_sched, air_info->v_t, now)))
- goto out;
-
- if (!ieee80211_txq_airtime_check(hw, &txqi->txq)) {
- first = false;
- goto begin;
- }
-
- air_sched->schedule_pos = node;
- air_sched->last_schedule_activity = now;
- ret = &txqi->txq;
-out:
- spin_unlock_bh(&air_sched->lock);
- return ret;
-}
-EXPORT_SYMBOL(ieee80211_next_txq);
-
-static void __ieee80211_insert_txq(struct rb_root_cached *root,
- struct txq_info *txqi)
+static inline s32 ieee80211_sta_deficit(struct sta_info *sta, u8 ac)
{
- struct rb_node **new = &root->rb_root.rb_node;
- struct airtime_info *old_air, *new_air;
- struct rb_node *parent = NULL;
- struct txq_info *__txqi;
- bool leftmost = true;
-
- while (*new) {
- parent = *new;
- __txqi = rb_entry(parent, struct txq_info, schedule_order);
- old_air = to_airtime_info(&__txqi->txq);
- new_air = to_airtime_info(&txqi->txq);
-
- if (new_air->v_t <= old_air->v_t) {
- new = &parent->rb_left;
- } else {
- new = &parent->rb_right;
- leftmost = false;
- }
- }
+ struct airtime_info *air_info = &sta->airtime[ac];
- rb_link_node(&txqi->schedule_order, parent, new);
- rb_insert_color_cached(&txqi->schedule_order, root, leftmost);
+ return air_info->deficit - atomic_read(&air_info->aql_tx_pending);
}
-void ieee80211_resort_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
+static void
+ieee80211_txq_set_active(struct txq_info *txqi)
{
- struct airtime_info *air_info = to_airtime_info(txq);
- struct ieee80211_local *local = hw_to_local(hw);
- struct txq_info *txqi = to_txq_info(txq);
- struct airtime_sched_info *air_sched;
-
- air_sched = &local->airtime[txq->ac];
-
- lockdep_assert_held(&air_sched->lock);
-
- if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
- struct airtime_info *a_prev = NULL, *a_next = NULL;
- struct txq_info *t_prev, *t_next;
- struct rb_node *n_prev, *n_next;
-
- /* Erasing a node can cause an expensive rebalancing operation,
- * so we check the previous and next nodes first and only remove
- * and re-insert if the current node is not already in the
- * correct position.
- */
- if ((n_prev = rb_prev(&txqi->schedule_order)) != NULL) {
- t_prev = container_of(n_prev, struct txq_info,
- schedule_order);
- a_prev = to_airtime_info(&t_prev->txq);
- }
-
- if ((n_next = rb_next(&txqi->schedule_order)) != NULL) {
- t_next = container_of(n_next, struct txq_info,
- schedule_order);
- a_next = to_airtime_info(&t_next->txq);
- }
-
- if ((!a_prev || a_prev->v_t <= air_info->v_t) &&
- (!a_next || a_next->v_t > air_info->v_t))
- return;
+ struct sta_info *sta;
- if (air_sched->schedule_pos == &txqi->schedule_order)
- air_sched->schedule_pos = n_prev;
+ if (!txqi->txq.sta)
+ return;
- rb_erase_cached(&txqi->schedule_order,
- &air_sched->active_txqs);
- RB_CLEAR_NODE(&txqi->schedule_order);
- __ieee80211_insert_txq(&air_sched->active_txqs, txqi);
- }
+ sta = container_of(txqi->txq.sta, struct sta_info, sta);
+ sta->airtime[txqi->txq.ac].last_active = (u32)jiffies;
}
-void ieee80211_update_airtime_weight(struct ieee80211_local *local,
- struct airtime_sched_info *air_sched,
- u64 now, bool force)
+static bool
+ieee80211_txq_keep_active(struct txq_info *txqi)
{
- struct airtime_info *air_info, *tmp;
- u64 weight_sum = 0;
+ struct sta_info *sta;
+ u32 diff;
- if (unlikely(!now))
- now = ktime_get_coarse_boottime_ns();
+ if (!txqi->txq.sta)
+ return false;
- lockdep_assert_held(&air_sched->lock);
+ sta = container_of(txqi->txq.sta, struct sta_info, sta);
+ if (ieee80211_sta_deficit(sta, txqi->txq.ac) >= 0)
+ return false;
- if (!force && (air_sched->last_weight_update <
- now - AIRTIME_ACTIVE_DURATION))
- return;
+ diff = (u32)jiffies - sta->airtime[txqi->txq.ac].last_active;
- list_for_each_entry_safe(air_info, tmp,
- &air_sched->active_list, list) {
- if (airtime_is_active(air_info, now))
- weight_sum += air_info->weight;
- else
- list_del_init(&air_info->list);
- }
- airtime_weight_sum_set(air_sched, weight_sum);
- air_sched->last_weight_update = now;
+ return diff <= AIRTIME_ACTIVE_DURATION;
}
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock)
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct txq_info *txqi = to_txq_info(txq);
- struct airtime_sched_info *air_sched;
- u64 now = ktime_get_coarse_boottime_ns();
- struct airtime_info *air_info;
- u8 ac = txq->ac;
- bool was_active;
-
- air_sched = &local->airtime[ac];
- air_info = to_airtime_info(txq);
+ struct ieee80211_txq *ret = NULL;
+ struct txq_info *txqi = NULL, *head = NULL;
+ bool found_eligible_txq = false;
- spin_lock_bh(&air_sched->lock);
- was_active = airtime_is_active(air_info, now);
- airtime_set_active(air_sched, air_info, now);
+ spin_lock_bh(&local->active_txq_lock[ac]);
- if (!RB_EMPTY_NODE(&txqi->schedule_order))
+ if (!local->schedule_round[ac])
goto out;
- /* If the station has been inactive for a while, catch up its v_t so it
- * doesn't get indefinite priority; see comment above the definition of
- * AIRTIME_MAX_BEHIND.
- */
- if ((!was_active && air_info->v_t < air_sched->v_t) ||
- air_info->v_t < air_sched->v_t - AIRTIME_MAX_BEHIND)
- air_info->v_t = air_sched->v_t;
+ begin:
+ txqi = list_first_entry_or_null(&local->active_txqs[ac],
+ struct txq_info,
+ schedule_order);
+ if (!txqi)
+ goto out;
- ieee80211_update_airtime_weight(local, air_sched, now, !was_active);
- __ieee80211_insert_txq(&air_sched->active_txqs, txqi);
+ if (txqi == head) {
+ if (!found_eligible_txq)
+ goto out;
+ else
+ found_eligible_txq = false;
+ }
-out:
- spin_unlock_bh(&air_sched->lock);
-}
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+ if (!head)
+ head = txqi;
-static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq,
- bool purge)
-{
- struct ieee80211_local *local = hw_to_local(hw);
- struct txq_info *txqi = to_txq_info(txq);
- struct airtime_sched_info *air_sched;
- struct airtime_info *air_info;
+ if (txqi->txq.sta) {
+ struct sta_info *sta = container_of(txqi->txq.sta,
+ struct sta_info, sta);
+ bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
+ s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac);
- air_sched = &local->airtime[txq->ac];
- air_info = to_airtime_info(&txqi->txq);
+ if (aql_check)
+ found_eligible_txq = true;
- lockdep_assert_held(&air_sched->lock);
+ if (deficit < 0)
+ sta->airtime[txqi->txq.ac].deficit +=
+ sta->airtime_weight;
- if (purge) {
- list_del_init(&air_info->list);
- ieee80211_update_airtime_weight(local, air_sched, 0, true);
+ if (deficit < 0 || !aql_check) {
+ list_move_tail(&txqi->schedule_order,
+ &local->active_txqs[txqi->txq.ac]);
+ goto begin;
+ }
}
- if (RB_EMPTY_NODE(&txqi->schedule_order))
- return;
-
- if (air_sched->schedule_pos == &txqi->schedule_order)
- air_sched->schedule_pos = rb_prev(&txqi->schedule_order);
+ if (txqi->schedule_round == local->schedule_round[ac])
+ goto out;
- if (!purge)
- airtime_set_active(air_sched, air_info,
- ktime_get_coarse_boottime_ns());
+ list_del_init(&txqi->schedule_order);
+ txqi->schedule_round = local->schedule_round[ac];
+ ret = &txqi->txq;
- rb_erase_cached(&txqi->schedule_order,
- &air_sched->active_txqs);
- RB_CLEAR_NODE(&txqi->schedule_order);
+out:
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ return ret;
}
+EXPORT_SYMBOL(ieee80211_next_txq);
-void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
struct ieee80211_txq *txq,
- bool purge)
- __acquires(txq_lock) __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- spin_lock_bh(&local->airtime[txq->ac].lock);
- __ieee80211_unschedule_txq(hw, txq, purge);
- spin_unlock_bh(&local->airtime[txq->ac].lock);
-}
-
-void ieee80211_return_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq, bool force)
+ bool force)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
+ bool has_queue;
+
+ spin_lock_bh(&local->active_txq_lock[txq->ac]);
+
+ has_queue = force || txq_has_queue(txq);
+ if (list_empty(&txqi->schedule_order) &&
+ (has_queue || ieee80211_txq_keep_active(txqi))) {
+ /* If airtime accounting is active, always enqueue STAs at the
+ * head of the list to ensure that they only get moved to the
+ * back by the airtime DRR scheduler once they have a negative
+ * deficit. A station that already has a negative deficit will
+ * get immediately moved to the back of the list on the next
+ * call to ieee80211_next_txq().
+ */
+ if (txqi->txq.sta && local->airtime_flags && has_queue &&
+ wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+ list_add(&txqi->schedule_order,
+ &local->active_txqs[txq->ac]);
+ else
+ list_add_tail(&txqi->schedule_order,
+ &local->active_txqs[txq->ac]);
+ if (has_queue)
+ ieee80211_txq_set_active(txqi);
+ }
- spin_lock_bh(&local->airtime[txq->ac].lock);
-
- if (!RB_EMPTY_NODE(&txqi->schedule_order) && !force &&
- !txq_has_queue(txq))
- __ieee80211_unschedule_txq(hw, txq, false);
-
- spin_unlock_bh(&local->airtime[txq->ac].lock);
+ spin_unlock_bh(&local->active_txq_lock[txq->ac]);
}
-EXPORT_SYMBOL(ieee80211_return_txq);
+EXPORT_SYMBOL(__ieee80211_schedule_txq);
DEFINE_STATIC_KEY_FALSE(aql_disable);
bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
- struct airtime_info *air_info = to_airtime_info(txq);
+ struct sta_info *sta;
struct ieee80211_local *local = hw_to_local(hw);
if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
@@ -4087,74 +4054,108 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
return true;
- if (atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_low)
+ sta = container_of(txq->sta, struct sta_info, sta);
+ if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
+ sta->airtime[txq->ac].aql_limit_low)
return true;
if (atomic_read(&local->aql_total_pending_airtime) <
local->aql_threshold &&
- atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_high)
+ atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
+ sta->airtime[txq->ac].aql_limit_high)
return true;
return false;
}
EXPORT_SYMBOL(ieee80211_txq_airtime_check);
+static bool
+ieee80211_txq_schedule_airtime_check(struct ieee80211_local *local, u8 ac)
+{
+ unsigned int num_txq = 0;
+ struct txq_info *txq;
+ u32 aql_limit;
+
+ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
+ return true;
+
+ list_for_each_entry(txq, &local->active_txqs[ac], schedule_order)
+ num_txq++;
+
+ aql_limit = (num_txq - 1) * local->aql_txq_limit_low[ac] / 2 +
+ local->aql_txq_limit_high[ac];
+
+ return atomic_read(&local->aql_ac_pending_airtime[ac]) < aql_limit;
+}
+
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
- struct txq_info *first_txqi = NULL, *txqi = to_txq_info(txq);
struct ieee80211_local *local = hw_to_local(hw);
- struct airtime_sched_info *air_sched;
- struct airtime_info *air_info;
- struct rb_node *node = NULL;
- bool ret = false;
- u64 now;
-
+ struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
+ struct sta_info *sta;
+ u8 ac = txq->ac;
- if (!ieee80211_txq_airtime_check(hw, txq))
- return false;
+ spin_lock_bh(&local->active_txq_lock[ac]);
- air_sched = &local->airtime[txq->ac];
- spin_lock_bh(&air_sched->lock);
+ if (!txqi->txq.sta)
+ goto out;
- if (RB_EMPTY_NODE(&txqi->schedule_order))
+ if (list_empty(&txqi->schedule_order))
goto out;
- now = ktime_get_coarse_boottime_ns();
+ if (!ieee80211_txq_schedule_airtime_check(local, ac))
+ goto out;
- /* Like in ieee80211_next_txq(), make sure the first station in the
- * scheduling order is eligible for transmission to avoid starvation.
- */
- node = rb_first_cached(&air_sched->active_txqs);
- if (node) {
- first_txqi = container_of(node, struct txq_info,
- schedule_order);
- air_info = to_airtime_info(&first_txqi->txq);
+ list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
+ schedule_order) {
+ if (iter == txqi)
+ break;
- if (air_sched->v_t < air_info->v_t)
- airtime_catchup_v_t(air_sched, air_info->v_t, now);
+ if (!iter->txq.sta) {
+ list_move_tail(&iter->schedule_order,
+ &local->active_txqs[ac]);
+ continue;
+ }
+ sta = container_of(iter->txq.sta, struct sta_info, sta);
+ if (ieee80211_sta_deficit(sta, ac) < 0)
+ sta->airtime[ac].deficit += sta->airtime_weight;
+ list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
}
- air_info = to_airtime_info(&txqi->txq);
- if (air_info->v_t <= air_sched->v_t) {
- air_sched->last_schedule_activity = now;
- ret = true;
- }
+ sta = container_of(txqi->txq.sta, struct sta_info, sta);
+ if (sta->airtime[ac].deficit >= 0)
+ goto out;
+
+ sta->airtime[ac].deficit += sta->airtime_weight;
+ list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ return false;
out:
- spin_unlock_bh(&air_sched->lock);
- return ret;
+ if (!list_empty(&txqi->schedule_order))
+ list_del_init(&txqi->schedule_order);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+
+ return true;
}
EXPORT_SYMBOL(ieee80211_txq_may_transmit);
void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct airtime_sched_info *air_sched = &local->airtime[ac];
- spin_lock_bh(&air_sched->lock);
- air_sched->schedule_pos = NULL;
- spin_unlock_bh(&air_sched->lock);
+ spin_lock_bh(&local->active_txq_lock[ac]);
+
+ if (ieee80211_txq_schedule_airtime_check(local, ac)) {
+ local->schedule_round[ac]++;
+ if (!local->schedule_round[ac])
+ local->schedule_round[ac]++;
+ } else {
+ local->schedule_round[ac] = 0;
+ }
+
+ spin_unlock_bh(&local->active_txq_lock[ac]);
}
EXPORT_SYMBOL(ieee80211_txq_schedule_start);
@@ -4170,7 +4171,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct sk_buff *next;
int len = skb->len;
- if (unlikely(skb->len < ETH_HLEN)) {
+ if (unlikely(!ieee80211_sdata_running(sdata) || skb->len < ETH_HLEN)) {
kfree_skb(skb);
return;
}
@@ -4280,9 +4281,6 @@ static bool ieee80211_multicast_to_unicast(struct sk_buff *skb,
const struct vlan_ethhdr *ethvlan = (void *)skb->data;
__be16 ethertype;
- if (likely(!is_multicast_ether_addr(eth->h_dest)))
- return false;
-
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
if (sdata->u.vlan.sta)
@@ -4366,6 +4364,44 @@ out:
rcu_read_unlock();
}
+static void ieee80211_mlo_multicast_tx_one(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, u32 ctrl_flags,
+ unsigned int link_id)
+{
+ struct sk_buff *out;
+
+ out = skb_copy(skb, GFP_ATOMIC);
+ if (!out)
+ return;
+
+ ctrl_flags |= u32_encode_bits(link_id, IEEE80211_TX_CTRL_MLO_LINK);
+ __ieee80211_subif_start_xmit(out, sdata->dev, 0, ctrl_flags, NULL);
+}
+
+static void ieee80211_mlo_multicast_tx(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ unsigned long links = sdata->vif.valid_links;
+ unsigned int link;
+ u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX;
+
+ if (hweight16(links) == 1) {
+ ctrl_flags |= u32_encode_bits(ffs(links) - 1,
+ IEEE80211_TX_CTRL_MLO_LINK);
+
+ __ieee80211_subif_start_xmit(skb, sdata->dev, 0, ctrl_flags,
+ NULL);
+ return;
+ }
+
+ for_each_set_bit(link, &links, IEEE80211_MLD_MAX_NUM_LINKS) {
+ ieee80211_mlo_multicast_tx_one(sdata, skb, ctrl_flags, link);
+ ctrl_flags = 0;
+ }
+ kfree_skb(skb);
+}
+
/**
* ieee80211_subif_start_xmit - netif start_xmit function for 802.3 vifs
* @skb: packet to be sent
@@ -4376,15 +4412,35 @@ out:
netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ const struct ethhdr *eth = (void *)skb->data;
+
+ if (likely(!is_multicast_ether_addr(eth->h_dest)))
+ goto normal;
+
+ if (unlikely(!ieee80211_sdata_running(sdata))) {
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
if (unlikely(ieee80211_multicast_to_unicast(skb, dev))) {
struct sk_buff_head queue;
__skb_queue_head_init(&queue);
ieee80211_convert_to_unicast(skb, dev, &queue);
while ((skb = __skb_dequeue(&queue)))
- __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
+ __ieee80211_subif_start_xmit(skb, dev, 0,
+ IEEE80211_TX_CTRL_MLO_LINK_UNSPEC,
+ NULL);
+ } else if (sdata->vif.valid_links &&
+ sdata->vif.type == NL80211_IFTYPE_AP &&
+ !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) {
+ ieee80211_mlo_multicast_tx(dev, skb);
} else {
- __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
+normal:
+ __ieee80211_subif_start_xmit(skb, dev, 0,
+ IEEE80211_TX_CTRL_MLO_LINK_UNSPEC,
+ NULL);
}
return NETDEV_TX_OK;
@@ -4439,7 +4495,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
struct net_device *dev, struct sta_info *sta,
struct ieee80211_key *key, struct sk_buff *skb)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_tx_info *info;
struct ieee80211_local *local = sdata->local;
struct tid_ampdu_tx *tid_tx;
u8 tid;
@@ -4454,6 +4510,11 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
goto out_free;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+
+ info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
ieee80211_aggr_check(sdata, sta, skb);
@@ -4463,7 +4524,9 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
if (tid_tx) {
if (!test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
/* fall back to non-offload slow path */
- __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
+ __ieee80211_subif_start_xmit(skb, dev, 0,
+ IEEE80211_TX_CTRL_MLO_LINK_UNSPEC,
+ NULL);
return;
}
@@ -4481,8 +4544,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
dev_sw_netstats_tx_add(dev, 1, skb->len);
- sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+ sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -4510,7 +4573,7 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
struct ieee80211_key *key;
struct sta_info *sta;
- if (unlikely(skb->len < ETH_HLEN)) {
+ if (unlikely(!ieee80211_sdata_running(sdata) || skb->len < ETH_HLEN)) {
kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -4565,7 +4628,8 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
goto out;
}
- skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0, NULL);
+ skb = ieee80211_build_hdr(sdata, skb, info_flags, sta,
+ IEEE80211_TX_CTRL_MLO_LINK_UNSPEC, NULL);
if (IS_ERR(skb))
goto out;
@@ -4617,12 +4681,16 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
sdata = vif_to_sdata(info->control.vif);
if (info->control.flags & IEEE80211_TX_INTCFL_NEED_TXPROCESSING) {
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (unlikely(!chanctx_conf)) {
- dev_kfree_skb(skb);
- return true;
+ /* update band only for non-MLD */
+ if (!sdata->vif.valid_links) {
+ chanctx_conf =
+ rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+ if (unlikely(!chanctx_conf)) {
+ dev_kfree_skb(skb);
+ return true;
+ }
+ info->band = chanctx_conf->def.chan->band;
}
- info->band = chanctx_conf->def.chan->band;
result = ieee80211_tx(sdata, NULL, skb, true);
} else if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) {
@@ -4702,12 +4770,14 @@ void ieee80211_tx_pending(struct tasklet_struct *t)
/* functions for drivers to get certain frames */
static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct ps_data *ps, struct sk_buff *skb,
bool is_template)
{
u8 *pos, *tim;
int aid0 = 0;
int i, have_bits = 0, n1, n2;
+ struct ieee80211_bss_conf *link_conf = link->conf;
/* Generate bitmap for TIM only if there are any STAs in power save
* mode. */
@@ -4718,7 +4788,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
IEEE80211_MAX_AID+1);
if (!is_template) {
if (ps->dtim_count == 0)
- ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
+ ps->dtim_count = link_conf->dtim_period - 1;
else
ps->dtim_count--;
}
@@ -4727,7 +4797,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
*pos++ = WLAN_EID_TIM;
*pos++ = 4;
*pos++ = ps->dtim_count;
- *pos++ = sdata->vif.bss_conf.dtim_period;
+ *pos++ = link_conf->dtim_period;
if (ps->dtim_count == 0 && !skb_queue_empty(&ps->bc_buf))
aid0 = 1;
@@ -4767,6 +4837,7 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct ps_data *ps, struct sk_buff *skb,
bool is_template)
{
@@ -4780,10 +4851,10 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
* of the tim bitmap in mac80211 and the driver.
*/
if (local->tim_in_locked_section) {
- __ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
+ __ieee80211_beacon_add_tim(sdata, link, ps, skb, is_template);
} else {
spin_lock_bh(&local->tim_lock);
- __ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
+ __ieee80211_beacon_add_tim(sdata, link, ps, skb, is_template);
spin_unlock_bh(&local->tim_lock);
}
@@ -4791,7 +4862,8 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
}
static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
- struct beacon_data *beacon)
+ struct beacon_data *beacon,
+ struct ieee80211_link_data *link)
{
u8 *beacon_data, count, max_count = 1;
struct probe_resp *resp;
@@ -4816,20 +4888,17 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
return;
}
- rcu_read_lock();
- resp = rcu_dereference(sdata->u.ap.probe_resp);
+ resp = rcu_dereference(link->u.ap.probe_resp);
bcn_offsets = beacon->cntdwn_counter_offsets;
count = beacon->cntdwn_current_counter;
- if (sdata->vif.csa_active)
+ if (link->conf->csa_active)
max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM;
for (i = 0; i < max_count; ++i) {
if (bcn_offsets[i]) {
- if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len)) {
- rcu_read_unlock();
+ if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len))
return;
- }
beacon_data[bcn_offsets[i]] = count;
}
@@ -4839,7 +4908,6 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
resp->data[resp_offsets[i]] = count;
}
}
- rcu_read_unlock();
}
static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon)
@@ -4861,7 +4929,7 @@ u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif)
rcu_read_lock();
if (sdata->vif.type == NL80211_IFTYPE_AP)
- beacon = rcu_dereference(sdata->u.ap.beacon);
+ beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
beacon = rcu_dereference(sdata->u.ibss.presp);
else if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -4886,7 +4954,7 @@ void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter)
rcu_read_lock();
if (sdata->vif.type == NL80211_IFTYPE_AP)
- beacon = rcu_dereference(sdata->u.ap.beacon);
+ beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
beacon = rcu_dereference(sdata->u.ibss.presp);
else if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -4916,9 +4984,7 @@ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif)
rcu_read_lock();
if (vif->type == NL80211_IFTYPE_AP) {
- struct ieee80211_if_ap *ap = &sdata->u.ap;
-
- beacon = rcu_dereference(ap->beacon);
+ beacon = rcu_dereference(sdata->deflink.u.ap.beacon);
if (WARN_ON(!beacon || !beacon->tail))
goto out;
beacon_data = beacon->tail;
@@ -4964,14 +5030,15 @@ EXPORT_SYMBOL(ieee80211_beacon_cntdwn_is_complete);
static int ieee80211_beacon_protect(struct sk_buff *skb,
struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link)
{
ieee80211_tx_result res;
struct ieee80211_tx_data tx;
struct sk_buff *check_skb;
memset(&tx, 0, sizeof(tx));
- tx.key = rcu_dereference(sdata->default_beacon_key);
+ tx.key = rcu_dereference(link->default_beacon_key);
if (!tx.key)
return 0;
tx.local = local;
@@ -4991,6 +5058,7 @@ static int ieee80211_beacon_protect(struct sk_buff *skb,
static void
ieee80211_beacon_get_finish(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
+ struct ieee80211_link_data *link,
struct ieee80211_mutable_offsets *offs,
struct beacon_data *beacon,
struct sk_buff *skb,
@@ -5026,7 +5094,7 @@ ieee80211_beacon_get_finish(struct ieee80211_hw *hw,
memset(&txrc, 0, sizeof(txrc));
txrc.hw = hw;
txrc.sband = local->hw.wiphy->bands[band];
- txrc.bss_conf = &sdata->vif.bss_conf;
+ txrc.bss_conf = link->conf;
txrc.skb = skb;
txrc.reported_rate.idx = -1;
if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band])
@@ -5037,14 +5105,30 @@ ieee80211_beacon_get_finish(struct ieee80211_hw *hw,
rate_control_get_rate(sdata, NULL, &txrc);
info->control.vif = vif;
+ info->control.flags |= u32_encode_bits(link->link_id,
+ IEEE80211_TX_CTRL_MLO_LINK);
info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT |
IEEE80211_TX_CTL_ASSIGN_SEQ |
IEEE80211_TX_CTL_FIRST_FRAGMENT;
}
+static void
+ieee80211_beacon_add_mbssid(struct sk_buff *skb, struct beacon_data *beacon)
+{
+ int i;
+
+ if (!beacon->mbssid_ies)
+ return;
+
+ for (i = 0; i < beacon->mbssid_ies->cnt; i++)
+ skb_put_data(skb, beacon->mbssid_ies->elem[i].data,
+ beacon->mbssid_ies->elem[i].len);
+}
+
static struct sk_buff *
ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
+ struct ieee80211_link_data *link,
struct ieee80211_mutable_offsets *offs,
bool is_template,
struct beacon_data *beacon,
@@ -5055,33 +5139,40 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
struct ieee80211_if_ap *ap = &sdata->u.ap;
struct sk_buff *skb = NULL;
u16 csa_off_base = 0;
+ int mbssid_len;
if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
ieee80211_beacon_update_cntdwn(vif);
- ieee80211_set_beacon_cntdwn(sdata, beacon);
+ ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
/* headroom, head length,
- * tail length and maximum TIM length
+ * tail length, maximum TIM length and multiple BSSID length
*/
+ mbssid_len = ieee80211_get_mbssid_beacon_len(beacon->mbssid_ies);
skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
beacon->tail_len + 256 +
- local->hw.extra_beacon_tailroom);
+ local->hw.extra_beacon_tailroom + mbssid_len);
if (!skb)
return NULL;
skb_reserve(skb, local->tx_headroom);
skb_put_data(skb, beacon->head, beacon->head_len);
- ieee80211_beacon_add_tim(sdata, &ap->ps, skb, is_template);
+ ieee80211_beacon_add_tim(sdata, link, &ap->ps, skb, is_template);
if (offs) {
offs->tim_offset = beacon->head_len;
offs->tim_length = skb->len - beacon->head_len;
offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0];
+ if (mbssid_len) {
+ ieee80211_beacon_add_mbssid(skb, beacon);
+ offs->mbssid_off = skb->len - mbssid_len;
+ }
+
/* for AP the csa offsets are from tail */
csa_off_base = skb->len;
}
@@ -5089,11 +5180,11 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
if (beacon->tail)
skb_put_data(skb, beacon->tail, beacon->tail_len);
- if (ieee80211_beacon_protect(skb, local, sdata) < 0)
+ if (ieee80211_beacon_protect(skb, local, sdata, link) < 0)
return NULL;
- ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb, chanctx_conf,
- csa_off_base);
+ ieee80211_beacon_get_finish(hw, vif, link, offs, beacon, skb,
+ chanctx_conf, csa_off_base);
return skb;
}
@@ -5101,18 +5192,24 @@ static struct sk_buff *
__ieee80211_beacon_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_mutable_offsets *offs,
- bool is_template)
+ bool is_template,
+ unsigned int link_id)
{
struct ieee80211_local *local = hw_to_local(hw);
struct beacon_data *beacon = NULL;
struct sk_buff *skb = NULL;
struct ieee80211_sub_if_data *sdata = NULL;
struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_link_data *link;
rcu_read_lock();
sdata = vif_to_sdata(vif);
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ link = rcu_dereference(sdata->link[link_id]);
+ if (!link)
+ goto out;
+ chanctx_conf =
+ rcu_dereference(link->conf->chanctx_conf);
if (!ieee80211_sdata_running(sdata) || !chanctx_conf)
goto out;
@@ -5121,13 +5218,11 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
memset(offs, 0, sizeof(*offs));
if (sdata->vif.type == NL80211_IFTYPE_AP) {
- struct ieee80211_if_ap *ap = &sdata->u.ap;
-
- beacon = rcu_dereference(ap->beacon);
+ beacon = rcu_dereference(link->u.ap.beacon);
if (!beacon)
goto out;
- skb = ieee80211_beacon_get_ap(hw, vif, offs, is_template,
+ skb = ieee80211_beacon_get_ap(hw, vif, link, offs, is_template,
beacon, chanctx_conf);
} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
@@ -5141,7 +5236,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (!is_template)
__ieee80211_beacon_update_cntdwn(beacon);
- ieee80211_set_beacon_cntdwn(sdata, beacon);
+ ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
@@ -5155,7 +5250,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_BEACON);
- ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb,
+ ieee80211_beacon_get_finish(hw, vif, link, offs, beacon, skb,
chanctx_conf, 0);
} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
@@ -5173,7 +5268,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
*/
__ieee80211_beacon_update_cntdwn(beacon);
- ieee80211_set_beacon_cntdwn(sdata, beacon);
+ ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
if (ifmsh->sync_ops)
@@ -5188,7 +5283,8 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
goto out;
skb_reserve(skb, local->tx_headroom);
skb_put_data(skb, beacon->head, beacon->head_len);
- ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
+ ieee80211_beacon_add_tim(sdata, link, &ifmsh->ps, skb,
+ is_template);
if (offs) {
offs->tim_offset = beacon->head_len;
@@ -5196,7 +5292,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
}
skb_put_data(skb, beacon->tail, beacon->tail_len);
- ieee80211_beacon_get_finish(hw, vif, offs, beacon, skb,
+ ieee80211_beacon_get_finish(hw, vif, link, offs, beacon, skb,
chanctx_conf, 0);
} else {
WARN_ON(1);
@@ -5212,20 +5308,22 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
struct sk_buff *
ieee80211_beacon_get_template(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- struct ieee80211_mutable_offsets *offs)
+ struct ieee80211_mutable_offsets *offs,
+ unsigned int link_id)
{
- return __ieee80211_beacon_get(hw, vif, offs, true);
+ return __ieee80211_beacon_get(hw, vif, offs, true, link_id);
}
EXPORT_SYMBOL(ieee80211_beacon_get_template);
struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- u16 *tim_offset, u16 *tim_length)
+ u16 *tim_offset, u16 *tim_length,
+ unsigned int link_id)
{
struct ieee80211_mutable_offsets offs = {};
- struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+ struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false,
+ link_id);
struct sk_buff *copy;
- struct ieee80211_supported_band *sband;
int shift;
if (!bcn)
@@ -5247,12 +5345,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
return bcn;
shift = ieee80211_vif_get_shift(vif);
- sband = ieee80211_get_sband(vif_to_sdata(vif));
- if (!sband)
- return bcn;
-
- ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false,
- NULL);
+ ieee80211_tx_monitor(hw_to_local(hw), copy, 1, shift, false, NULL);
return bcn;
}
@@ -5261,7 +5354,6 @@ EXPORT_SYMBOL(ieee80211_beacon_get_tim);
struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
- struct ieee80211_if_ap *ap = NULL;
struct sk_buff *skb = NULL;
struct probe_resp *presp = NULL;
struct ieee80211_hdr *hdr;
@@ -5271,9 +5363,7 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw,
return NULL;
rcu_read_lock();
-
- ap = &sdata->u.ap;
- presp = rcu_dereference(ap->probe_resp);
+ presp = rcu_dereference(sdata->deflink.u.ap.probe_resp);
if (!presp)
goto out;
@@ -5303,7 +5393,7 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
return NULL;
rcu_read_lock();
- tmpl = rcu_dereference(sdata->u.ap.fils_discovery);
+ tmpl = rcu_dereference(sdata->deflink.u.ap.fils_discovery);
if (!tmpl) {
rcu_read_unlock();
return NULL;
@@ -5332,7 +5422,7 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
return NULL;
rcu_read_lock();
- tmpl = rcu_dereference(sdata->u.ap.unsol_bcast_probe_resp);
+ tmpl = rcu_dereference(sdata->deflink.u.ap.unsol_bcast_probe_resp);
if (!tmpl) {
rcu_read_unlock();
return NULL;
@@ -5353,7 +5443,6 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_if_managed *ifmgd;
struct ieee80211_pspoll *pspoll;
struct ieee80211_local *local;
struct sk_buff *skb;
@@ -5362,7 +5451,6 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
return NULL;
sdata = vif_to_sdata(vif);
- ifmgd = &sdata->u.mgd;
local = sdata->local;
skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
@@ -5374,12 +5462,12 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
pspoll = skb_put_zero(skb, sizeof(*pspoll));
pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
IEEE80211_STYPE_PSPOLL);
- pspoll->aid = cpu_to_le16(sdata->vif.bss_conf.aid);
+ pspoll->aid = cpu_to_le16(sdata->vif.cfg.aid);
/* aid in PS-Poll has its two MSBs each set to 1 */
pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
- memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
+ memcpy(pspoll->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
memcpy(pspoll->ta, vif->addr, ETH_ALEN);
return skb;
@@ -5388,35 +5476,39 @@ EXPORT_SYMBOL(ieee80211_pspoll_get);
struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
- bool qos_ok)
+ int link_id, bool qos_ok)
{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link = NULL;
struct ieee80211_hdr_3addr *nullfunc;
- struct ieee80211_sub_if_data *sdata;
- struct ieee80211_if_managed *ifmgd;
- struct ieee80211_local *local;
struct sk_buff *skb;
bool qos = false;
if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
return NULL;
- sdata = vif_to_sdata(vif);
- ifmgd = &sdata->u.mgd;
- local = sdata->local;
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ sizeof(*nullfunc) + 2);
+ if (!skb)
+ return NULL;
+ rcu_read_lock();
if (qos_ok) {
struct sta_info *sta;
- rcu_read_lock();
- sta = sta_info_get(sdata, ifmgd->bssid);
+ sta = sta_info_get(sdata, vif->cfg.ap_addr);
qos = sta && sta->sta.wme;
- rcu_read_unlock();
}
- skb = dev_alloc_skb(local->hw.extra_tx_headroom +
- sizeof(*nullfunc) + 2);
- if (!skb)
- return NULL;
+ if (link_id >= 0) {
+ link = rcu_dereference(sdata->link[link_id]);
+ if (WARN_ON_ONCE(!link)) {
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return NULL;
+ }
+ }
skb_reserve(skb, local->hw.extra_tx_headroom);
@@ -5437,9 +5529,16 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
skb_put_data(skb, &qoshdr, sizeof(qoshdr));
}
- memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
- memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
- memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
+ if (link) {
+ memcpy(nullfunc->addr1, link->conf->bssid, ETH_ALEN);
+ memcpy(nullfunc->addr2, link->conf->addr, ETH_ALEN);
+ memcpy(nullfunc->addr3, link->conf->bssid, ETH_ALEN);
+ } else {
+ memcpy(nullfunc->addr1, vif->cfg.ap_addr, ETH_ALEN);
+ memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
+ memcpy(nullfunc->addr3, vif->cfg.ap_addr, ETH_ALEN);
+ }
+ rcu_read_unlock();
return skb;
}
@@ -5529,14 +5628,14 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
sdata = vif_to_sdata(vif);
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (!chanctx_conf)
goto out;
if (sdata->vif.type == NL80211_IFTYPE_AP) {
struct beacon_data *beacon =
- rcu_dereference(sdata->u.ap.beacon);
+ rcu_dereference(sdata->deflink.u.ap.beacon);
if (!beacon || !beacon->head)
goto out;
@@ -5680,10 +5779,12 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *pubsta, u8 tid)
EXPORT_SYMBOL(ieee80211_unreserve_tid);
void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, int tid,
+ struct sk_buff *skb, int tid, int link_id,
enum nl80211_band band)
{
+ const struct ieee80211_hdr *hdr = (void *)skb->data;
int ac = ieee80211_ac_from_tid(tid);
+ unsigned int link;
skb_reset_mac_header(skb);
skb_set_queue_mapping(skb, ac);
@@ -5691,6 +5792,38 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
skb->dev = sdata->dev;
+ BUILD_BUG_ON(IEEE80211_LINK_UNSPECIFIED < IEEE80211_MLD_MAX_NUM_LINKS);
+ BUILD_BUG_ON(!FIELD_FIT(IEEE80211_TX_CTRL_MLO_LINK,
+ IEEE80211_LINK_UNSPECIFIED));
+
+ if (!sdata->vif.valid_links) {
+ link = 0;
+ } else if (link_id >= 0) {
+ link = link_id;
+ } else if (memcmp(sdata->vif.addr, hdr->addr2, ETH_ALEN) == 0) {
+ /* address from the MLD */
+ link = IEEE80211_LINK_UNSPECIFIED;
+ } else {
+ /* otherwise must be addressed from a link */
+ rcu_read_lock();
+ for (link = 0; link < ARRAY_SIZE(sdata->vif.link_conf); link++) {
+ struct ieee80211_bss_conf *link_conf;
+
+ link_conf = rcu_dereference(sdata->vif.link_conf[link]);
+ if (!link_conf)
+ continue;
+ if (memcmp(link_conf->addr, hdr->addr2, ETH_ALEN) == 0)
+ break;
+ }
+ rcu_read_unlock();
+
+ if (WARN_ON_ONCE(link == ARRAY_SIZE(sdata->vif.link_conf)))
+ link = ffs(sdata->vif.valid_links) - 1;
+ }
+
+ IEEE80211_SKB_CB(skb)->control.flags |=
+ u32_encode_bits(link, IEEE80211_TX_CTRL_MLO_LINK);
+
/*
* The other path calling ieee80211_xmit is from the tasklet,
* and while we can handle concurrent transmissions locking
@@ -5702,10 +5835,38 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
local_bh_enable();
}
+void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb, int tid, int link_id)
+{
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ enum nl80211_band band;
+
+ rcu_read_lock();
+ if (!sdata->vif.valid_links) {
+ WARN_ON(link_id >= 0);
+ chanctx_conf =
+ rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return;
+ }
+ band = chanctx_conf->def.chan->band;
+ } else {
+ WARN_ON(link_id >= 0 &&
+ !(sdata->vif.valid_links & BIT(link_id)));
+ /* MLD transmissions must not rely on the band */
+ band = 0;
+ }
+
+ __ieee80211_tx_skb_tid_band(sdata, skb, tid, link_id, band);
+ rcu_read_unlock();
+}
+
int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len,
const u8 *dest, __be16 proto, bool unencrypted,
- u64 *cookie)
+ int link_id, u64 *cookie)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -5714,6 +5875,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
struct ethhdr *ehdr;
u32 ctrl_flags = 0;
u32 flags = 0;
+ int err;
/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
* or Pre-Authentication
@@ -5745,7 +5907,29 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
ehdr = skb_push(skb, sizeof(struct ethhdr));
memcpy(ehdr->h_dest, dest, ETH_ALEN);
- memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN);
+
+ /* we may override the SA for MLO STA later */
+ if (link_id < 0) {
+ ctrl_flags |= u32_encode_bits(IEEE80211_LINK_UNSPECIFIED,
+ IEEE80211_TX_CTRL_MLO_LINK);
+ memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN);
+ } else {
+ struct ieee80211_bss_conf *link_conf;
+
+ ctrl_flags |= u32_encode_bits(link_id,
+ IEEE80211_TX_CTRL_MLO_LINK);
+
+ rcu_read_lock();
+ link_conf = rcu_dereference(sdata->vif.link_conf[link_id]);
+ if (!link_conf) {
+ dev_kfree_skb(skb);
+ rcu_read_unlock();
+ return -ENOLINK;
+ }
+ memcpy(ehdr->h_source, link_conf->addr, ETH_ALEN);
+ rcu_read_unlock();
+ }
+
ehdr->h_proto = proto;
skb->dev = dev;
@@ -5753,21 +5937,37 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_network_header(skb);
skb_reset_mac_header(skb);
+ if (local->hw.queues < IEEE80211_NUM_ACS)
+ goto start_xmit;
+
/* update QoS header to prioritize control port frames if possible,
* priorization also happens for control port frames send over
* AF_PACKET
*/
rcu_read_lock();
+ err = ieee80211_lookup_ra_sta(sdata, skb, &sta);
+ if (err) {
+ dev_kfree_skb(skb);
+ rcu_read_unlock();
+ return err;
+ }
- if (ieee80211_lookup_ra_sta(sdata, skb, &sta) == 0 && !IS_ERR(sta)) {
+ if (!IS_ERR(sta)) {
u16 queue = __ieee80211_select_queue(sdata, sta, skb);
skb_set_queue_mapping(skb, queue);
skb_get_hash(skb);
- }
+ /*
+ * for MLO STA, the SA should be the AP MLD address, but
+ * the link ID has been selected already
+ */
+ if (sta && sta->sta.mlo)
+ memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN);
+ }
rcu_read_unlock();
+start_xmit:
/* mutex lock is only needed for incrementing the cookie counter */
mutex_lock(&local->mtx);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index f71b042a5c8b..b512cb37aafb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*
* utilities for mac80211
*/
@@ -191,7 +191,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
if (vif) {
sdata = vif_to_sdata(vif);
short_preamble = sdata->vif.bss_conf.use_short_preamble;
- if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+ if (sdata->deflink.operating_11g_mode)
erp = rate->flags & IEEE80211_RATE_ERP_G;
shift = ieee80211_vif_get_shift(vif);
}
@@ -225,7 +225,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
if (vif) {
sdata = vif_to_sdata(vif);
short_preamble = sdata->vif.bss_conf.use_short_preamble;
- if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+ if (sdata->deflink.operating_11g_mode)
erp = rate->flags & IEEE80211_RATE_ERP_G;
shift = ieee80211_vif_get_shift(vif);
}
@@ -268,7 +268,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
if (vif) {
sdata = vif_to_sdata(vif);
short_preamble = sdata->vif.bss_conf.use_short_preamble;
- if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
+ if (sdata->deflink.operating_11g_mode)
erp = rate->flags & IEEE80211_RATE_ERP_G;
shift = ieee80211_vif_get_shift(vif);
}
@@ -301,11 +301,14 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
local_bh_disable();
spin_lock(&fq->lock);
+ sdata->vif.txqs_stopped[ac] = false;
+
+ if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
+ goto out;
+
if (sdata->vif.type == NL80211_IFTYPE_AP)
ps = &sdata->bss->ps;
- sdata->vif.txqs_stopped[ac] = false;
-
list_for_each_entry_rcu(sta, &local->sta_list, list) {
if (sdata != sta->sdata)
continue;
@@ -951,9 +954,11 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_queue_delayed_work);
-static void ieee80211_parse_extension_element(u32 *crc,
- const struct element *elem,
- struct ieee802_11_elems *elems)
+static void
+ieee80211_parse_extension_element(u32 *crc,
+ const struct element *elem,
+ struct ieee802_11_elems *elems,
+ struct ieee80211_elems_parse_params *params)
{
const void *data = elem->data + 1;
u8 len;
@@ -973,8 +978,10 @@ static void ieee80211_parse_extension_element(u32 *crc,
}
break;
case WLAN_EID_EXT_HE_CAPABILITY:
- elems->he_cap = data;
- elems->he_cap_len = len;
+ if (ieee80211_he_capa_size_ok(data, len)) {
+ elems->he_cap = data;
+ elems->he_cap_len = len;
+ }
break;
case WLAN_EID_EXT_HE_OPERATION:
if (len >= sizeof(*elems->he_operation) &&
@@ -1006,23 +1013,39 @@ static void ieee80211_parse_extension_element(u32 *crc,
if (len >= sizeof(*elems->he_6ghz_capa))
elems->he_6ghz_capa = data;
break;
+ case WLAN_EID_EXT_EHT_CAPABILITY:
+ if (ieee80211_eht_capa_size_ok(elems->he_cap,
+ data, len,
+ params->from_ap)) {
+ elems->eht_cap = data;
+ elems->eht_cap_len = len;
+ }
+ break;
+ case WLAN_EID_EXT_EHT_OPERATION:
+ if (ieee80211_eht_oper_size_ok(data, len))
+ elems->eht_operation = data;
+ break;
+ case WLAN_EID_EXT_EHT_MULTI_LINK:
+ if (ieee80211_mle_size_ok(data, len))
+ elems->multi_link = (void *)data;
+ break;
}
}
static u32
-_ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
- struct ieee802_11_elems *elems,
- u64 filter, u32 crc,
- const struct element *check_inherit)
+_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
+ struct ieee802_11_elems *elems,
+ const struct element *check_inherit)
{
const struct element *elem;
- bool calc_crc = filter != 0;
+ bool calc_crc = params->filter != 0;
DECLARE_BITMAP(seen_elems, 256);
+ u32 crc = params->crc;
const u8 *ie;
bitmap_zero(seen_elems, 256);
- for_each_element(elem, start, len) {
+ for_each_element(elem, params->start, params->len) {
bool elem_parse_failed;
u8 id = elem->id;
u8 elen = elem->datalen;
@@ -1085,7 +1108,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
break;
}
- if (calc_crc && id < 64 && (filter & (1ULL << id)))
+ if (calc_crc && id < 64 && (params->filter & (1ULL << id)))
crc = crc32_be(crc, pos - 2, elen + 2);
elem_parse_failed = false;
@@ -1266,7 +1289,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->mesh_chansw_params_ie = (void *)pos;
break;
case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
- if (!action ||
+ if (!params->action ||
elen < sizeof(*elems->wide_bw_chansw_ie)) {
elem_parse_failed = true;
break;
@@ -1274,7 +1297,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->wide_bw_chansw_ie = (void *)pos;
break;
case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
- if (action) {
+ if (params->action) {
elem_parse_failed = true;
break;
}
@@ -1365,7 +1388,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
case WLAN_EID_EXTENSION:
ieee80211_parse_extension_element(calc_crc ?
&crc : NULL,
- elem, elems);
+ elem, elems, params);
break;
case WLAN_EID_S1G_CAPABILITIES:
if (elen >= sizeof(*elems->s1g_capab))
@@ -1401,7 +1424,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
__set_bit(id, seen_elems);
}
- if (!for_each_element_completed(elem, start, len))
+ if (!for_each_element_completed(elem, params->start, params->len))
elems->parse_error = true;
return crc;
@@ -1409,20 +1432,21 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
struct ieee802_11_elems *elems,
- const u8 *transmitter_bssid,
- const u8 *bss_bssid,
+ struct cfg80211_bss *bss,
u8 *nontransmitted_profile)
{
const struct element *elem, *sub;
size_t profile_len = 0;
bool found = false;
- if (!bss_bssid || !transmitter_bssid)
+ if (!bss || !bss->transmitted_bss)
return profile_len;
for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) {
if (elem->datalen < 2)
continue;
+ if (elem->data[0] < 1 || elem->data[0] > 8)
+ continue;
for_each_element(sub, elem->data + 1, elem->datalen - 1) {
u8 new_bssid[ETH_ALEN];
@@ -1459,11 +1483,11 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
continue;
}
- cfg80211_gen_new_bssid(transmitter_bssid,
+ cfg80211_gen_new_bssid(bss->transmitted_bss->bssid,
elem->data[0],
index[2],
new_bssid);
- if (ether_addr_equal(new_bssid, bss_bssid)) {
+ if (ether_addr_equal(new_bssid, bss->bssid)) {
found = true;
elems->bssid_index_len = index[1];
elems->bssid_index = (void *)&index[2];
@@ -1475,44 +1499,47 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
return found ? profile_len : 0;
}
-struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
- bool action, u64 filter,
- u32 crc,
- const u8 *transmitter_bssid,
- const u8 *bss_bssid)
+struct ieee802_11_elems *
+ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
{
struct ieee802_11_elems *elems;
const struct element *non_inherit = NULL;
u8 *nontransmitted_profile;
int nontransmitted_profile_len = 0;
+ size_t scratch_len = params->len;
- elems = kzalloc(sizeof(*elems), GFP_ATOMIC);
+ elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC);
if (!elems)
return NULL;
- elems->ie_start = start;
- elems->total_len = len;
-
- nontransmitted_profile = kmalloc(len, GFP_ATOMIC);
- if (nontransmitted_profile) {
- nontransmitted_profile_len =
- ieee802_11_find_bssid_profile(start, len, elems,
- transmitter_bssid,
- bss_bssid,
- nontransmitted_profile);
- non_inherit =
- cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- nontransmitted_profile,
- nontransmitted_profile_len);
- }
-
- crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter,
- crc, non_inherit);
+ elems->ie_start = params->start;
+ elems->total_len = params->len;
+ elems->scratch_len = scratch_len;
+ elems->scratch_pos = elems->scratch;
+
+ nontransmitted_profile = elems->scratch_pos;
+ nontransmitted_profile_len =
+ ieee802_11_find_bssid_profile(params->start, params->len,
+ elems, params->bss,
+ nontransmitted_profile);
+ elems->scratch_pos += nontransmitted_profile_len;
+ elems->scratch_len -= nontransmitted_profile_len;
+ non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ nontransmitted_profile,
+ nontransmitted_profile_len);
+
+ elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit);
/* Override with nontransmitted profile, if found */
- if (nontransmitted_profile_len)
- _ieee802_11_parse_elems_crc(nontransmitted_profile,
- nontransmitted_profile_len,
- action, elems, 0, 0, NULL);
+ if (nontransmitted_profile_len) {
+ struct ieee80211_elems_parse_params sub = {
+ .start = nontransmitted_profile,
+ .len = nontransmitted_profile_len,
+ .action = params->action,
+ .link_id = params->link_id,
+ };
+
+ _ieee802_11_parse_elems_full(&sub, elems, NULL);
+ }
if (elems->tim && !elems->parse_error) {
const struct ieee80211_tim_ie *tim_ie = elems->tim;
@@ -1532,10 +1559,6 @@ struct ieee802_11_elems *ieee802_11_parse_elems_crc(const u8 *start, size_t len,
offsetofend(struct ieee80211_bssid_index, dtim_count))
elems->dtim_count = elems->bssid_index->dtim_count;
- kfree(nontransmitted_profile);
-
- elems->crc = crc;
-
return elems;
}
@@ -1553,7 +1576,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
return;
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (chanctx_conf)
center_freq = chanctx_conf->def.chan->center_freq;
@@ -1580,9 +1603,10 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
}
-void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
+void ieee80211_set_wmm_default(struct ieee80211_link_data *link,
bool bss_notify, bool enable_qos)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_queue_params qparam;
struct ieee80211_chanctx_conf *chanctx_conf;
@@ -1600,10 +1624,10 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
memset(&qparam, 0, sizeof(qparam));
rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ chanctx_conf = rcu_dereference(link->conf->chanctx_conf);
use_11b = (chanctx_conf &&
chanctx_conf->def.chan->band == NL80211_BAND_2GHZ) &&
- !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE);
+ !link->operating_11g_mode;
rcu_read_unlock();
is_ocb = (sdata->vif.type == NL80211_IFTYPE_OCB);
@@ -1677,17 +1701,17 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
qparam.uapsd = false;
- sdata->tx_conf[ac] = qparam;
- drv_conf_tx(local, sdata, ac, &qparam);
+ link->tx_conf[ac] = qparam;
+ drv_conf_tx(local, link, ac, &qparam);
}
if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
sdata->vif.type != NL80211_IFTYPE_NAN) {
- sdata->vif.bss_conf.qos = enable_qos;
+ link->conf->qos = enable_qos;
if (bss_notify)
- ieee80211_bss_info_change_notify(sdata,
- BSS_CHANGED_QOS);
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_QOS);
}
}
@@ -1700,11 +1724,28 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
+ bool multi_link = sdata->vif.valid_links;
+ struct {
+ u8 id;
+ u8 len;
+ u8 ext_id;
+ struct ieee80211_multi_link_elem ml;
+ struct ieee80211_mle_basic_common_info basic;
+ } __packed mle = {
+ .id = WLAN_EID_EXTENSION,
+ .len = sizeof(mle) - 2,
+ .ext_id = WLAN_EID_EXT_EHT_MULTI_LINK,
+ .ml.control = cpu_to_le16(IEEE80211_ML_CONTROL_TYPE_BASIC),
+ .basic.len = sizeof(mle.basic),
+ };
int err;
+ memcpy(mle.basic.mld_mac_addr, sdata->vif.addr, ETH_ALEN);
+
/* 24 + 6 = header + auth_algo + auth_transaction + status_code */
skb = dev_alloc_skb(local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN +
- 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN);
+ 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN +
+ multi_link * sizeof(mle));
if (!skb)
return;
@@ -1721,6 +1762,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
mgmt->u.auth.status_code = cpu_to_le16(status);
if (extra)
skb_put_data(skb, extra, extra_len);
+ if (multi_link)
+ skb_put_data(skb, &mle, sizeof(mle));
if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
@@ -1799,6 +1842,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
const struct ieee80211_sta_he_cap *he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap;
u8 *pos = buffer, *end = buffer + buffer_len;
size_t noffset;
int supp_rates_len, i;
@@ -1974,7 +2018,20 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
if (he_cap &&
cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
IEEE80211_CHAN_NO_HE)) {
- pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
+ pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end);
+ if (!pos)
+ goto out_err;
+ }
+
+ eht_cap = ieee80211_get_eht_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+
+ if (eht_cap &&
+ cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ IEEE80211_CHAN_NO_HE |
+ IEEE80211_CHAN_NO_EHT)) {
+ pos = ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, end,
+ sdata->vif.type == NL80211_IFTYPE_AP);
if (!pos)
goto out_err;
}
@@ -1991,7 +2048,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
if (he_cap) {
enum nl80211_iftype iftype =
ieee80211_vif_type_p2p(&sdata->vif);
- __le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype);
+ __le16 cap = ieee80211_get_he_6ghz_capa(sband6, iftype);
pos = ieee80211_write_he_6ghz_cap(pos, cap, end);
}
@@ -2229,7 +2286,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
}
static void ieee80211_assign_chanctx(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link)
{
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *ctx;
@@ -2238,11 +2296,11 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
return;
mutex_lock(&local->chanctx_mtx);
- conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
if (conf) {
ctx = container_of(conf, struct ieee80211_chanctx, conf);
- drv_assign_vif_chanctx(local, sdata, ctx);
+ drv_assign_vif_chanctx(local, sdata, link->conf, ctx);
}
mutex_unlock(&local->chanctx_mtx);
}
@@ -2321,6 +2379,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
struct cfg80211_sched_scan_request *sched_scan_req;
bool sched_scan_stopped = false;
bool suspended = local->suspended;
+ bool in_reconfig = false;
/* nothing to do if HW shouldn't run */
if (!local->open_count)
@@ -2447,7 +2506,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
sdata = wiphy_dereference(local->hw.wiphy,
local->monitor_sdata);
if (sdata && ieee80211_sdata_running(sdata))
- ieee80211_assign_chanctx(local, sdata);
+ ieee80211_assign_chanctx(local, sdata, &sdata->deflink);
}
/* reconfigure hardware */
@@ -2457,19 +2516,29 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* Finally also reconfigure all the BSS information */
list_for_each_entry(sdata, &local->interfaces, list) {
+ unsigned int link_id;
u32 changed;
if (!ieee80211_sdata_running(sdata))
continue;
- ieee80211_assign_chanctx(local, sdata);
+ sdata_lock(sdata);
+ for (link_id = 0;
+ link_id < ARRAY_SIZE(sdata->vif.link_conf);
+ link_id++) {
+ struct ieee80211_link_data *link;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (link)
+ ieee80211_assign_chanctx(local, sdata, link);
+ }
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MONITOR:
break;
case NL80211_IFTYPE_ADHOC:
- if (sdata->vif.bss_conf.ibss_joined)
+ if (sdata->vif.cfg.ibss_joined)
WARN_ON(drv_join_ibss(local, sdata));
fallthrough;
default:
@@ -2477,10 +2546,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
fallthrough;
case NL80211_IFTYPE_AP: /* AP stations are handled later */
for (i = 0; i < IEEE80211_NUM_ACS; i++)
- drv_conf_tx(local, sdata, i,
- &sdata->tx_conf[i]);
+ drv_conf_tx(local, &sdata->deflink, i,
+ &sdata->deflink.tx_conf[i]);
break;
}
+ sdata_unlock(sdata);
/* common change flags for all interface types */
changed = BSS_CHANGED_ERP_CTS_PROT |
@@ -2496,7 +2566,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
BSS_CHANGED_TXPOWER |
BSS_CHANGED_MCAST_RATE;
- if (sdata->vif.mu_mimo_owner)
+ if (sdata->vif.bss_conf.mu_mimo_owner)
changed |= BSS_CHANGED_MU_GROUPS;
switch (sdata->vif.type) {
@@ -2506,7 +2576,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
BSS_CHANGED_PS;
/* Re-send beacon info report to the driver */
- if (sdata->u.mgd.have_beacon)
+ if (sdata->deflink.u.mgd.have_beacon)
changed |= BSS_CHANGED_BEACON_INFO;
if (sdata->vif.bss_conf.max_idle_period ||
@@ -2535,8 +2605,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (sdata->vif.type == NL80211_IFTYPE_AP) {
changed |= BSS_CHANGED_AP_PROBE_RESP;
- if (rcu_access_pointer(sdata->u.ap.beacon))
- drv_start_ap(local, sdata);
+ if (rcu_access_pointer(sdata->deflink.u.ap.beacon))
+ drv_start_ap(local, sdata,
+ sdata->deflink.conf);
}
fallthrough;
case NL80211_IFTYPE_MESH_POINT:
@@ -2588,23 +2659,21 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
/* APs are now beaconing, add back stations */
- mutex_lock(&local->sta_mtx);
- list_for_each_entry(sta, &local->sta_list, list) {
- enum ieee80211_sta_state state;
-
- if (!sta->uploaded)
- continue;
-
- if (sta->sdata->vif.type != NL80211_IFTYPE_AP &&
- sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(sdata))
continue;
- for (state = IEEE80211_STA_NOTEXIST;
- state < sta->sta_state; state++)
- WARN_ON(drv_sta_state(local, sta->sdata, sta, state,
- state + 1));
+ sdata_lock(sdata);
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_AP:
+ ieee80211_reconfig_stations(sdata);
+ break;
+ default:
+ break;
+ }
+ sdata_unlock(sdata);
}
- mutex_unlock(&local->sta_mtx);
/* add back keys */
list_for_each_entry(sdata, &local->interfaces, list)
@@ -2672,6 +2741,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART);
if (local->in_reconfig) {
+ in_reconfig = local->in_reconfig;
local->in_reconfig = false;
barrier();
@@ -2689,6 +2759,15 @@ int ieee80211_reconfig(struct ieee80211_local *local)
IEEE80211_QUEUE_STOP_REASON_SUSPEND,
false);
+ if (in_reconfig) {
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+ if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ ieee80211_sta_restart(sdata);
+ }
+ }
+
if (!suspended)
return 0;
@@ -2718,7 +2797,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
return 0;
}
-void ieee80211_resume_disconnect(struct ieee80211_vif *vif)
+static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag)
{
struct ieee80211_sub_if_data *sdata;
struct ieee80211_local *local;
@@ -2730,22 +2809,39 @@ void ieee80211_resume_disconnect(struct ieee80211_vif *vif)
sdata = vif_to_sdata(vif);
local = sdata->local;
- if (WARN_ON(!local->resuming))
+ if (WARN_ON(flag & IEEE80211_SDATA_DISCONNECT_RESUME &&
+ !local->resuming))
+ return;
+
+ if (WARN_ON(flag & IEEE80211_SDATA_DISCONNECT_HW_RESTART &&
+ !local->in_reconfig))
return;
if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
return;
- sdata->flags |= IEEE80211_SDATA_DISCONNECT_RESUME;
+ sdata->flags |= flag;
mutex_lock(&local->key_mtx);
list_for_each_entry(key, &sdata->key_list, list)
key->flags |= KEY_FLAG_TAINTED;
mutex_unlock(&local->key_mtx);
}
+
+void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif)
+{
+ ieee80211_reconfig_disconnect(vif, IEEE80211_SDATA_DISCONNECT_HW_RESTART);
+}
+EXPORT_SYMBOL_GPL(ieee80211_hw_restart_disconnect);
+
+void ieee80211_resume_disconnect(struct ieee80211_vif *vif)
+{
+ ieee80211_reconfig_disconnect(vif, IEEE80211_SDATA_DISCONNECT_RESUME);
+}
EXPORT_SYMBOL_GPL(ieee80211_resume_disconnect);
-void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata)
+void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *chanctx_conf;
@@ -2753,8 +2849,8 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata)
mutex_lock(&local->chanctx_mtx);
- chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ chanctx_conf = rcu_dereference_protected(link->conf->chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
/*
* This function can be called from a work, thus it may be possible
@@ -2771,22 +2867,48 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->chanctx_mtx);
}
-void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata)
+void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
+ int link_id)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_chanctx *chanctx;
+ int i;
mutex_lock(&local->chanctx_mtx);
- chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
- lockdep_is_held(&local->chanctx_mtx));
+ for (i = 0; i < ARRAY_SIZE(sdata->vif.link_conf); i++) {
+ struct ieee80211_bss_conf *bss_conf;
- if (WARN_ON_ONCE(!chanctx_conf))
- goto unlock;
+ if (link_id >= 0 && link_id != i)
+ continue;
- chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
- ieee80211_recalc_chanctx_min_def(local, chanctx);
+ rcu_read_lock();
+ bss_conf = rcu_dereference(sdata->vif.link_conf[i]);
+ if (!bss_conf) {
+ rcu_read_unlock();
+ continue;
+ }
+
+ chanctx_conf = rcu_dereference_protected(bss_conf->chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ /*
+ * Since we hold the chanctx_mtx (checked above)
+ * we can take the chanctx_conf pointer out of the
+ * RCU critical section, it cannot go away without
+ * the mutex. Just the way we reached it could - in
+ * theory - go away, but we don't really care and
+ * it really shouldn't happen anyway.
+ */
+ rcu_read_unlock();
+
+ if (!chanctx_conf)
+ goto unlock;
+
+ chanctx = container_of(chanctx_conf, struct ieee80211_chanctx,
+ conf);
+ ieee80211_recalc_chanctx_min_def(local, chanctx);
+ }
unlock:
mutex_unlock(&local->chanctx_mtx);
}
@@ -2801,46 +2923,6 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
return pos;
}
-static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata,
- int rssi_min_thold,
- int rssi_max_thold)
-{
- trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold);
-
- if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
- return;
-
- /*
- * Scale up threshold values before storing it, as the RSSI averaging
- * algorithm uses a scaled up value as well. Change this scaling
- * factor if the RSSI averaging algorithm changes.
- */
- sdata->u.mgd.rssi_min_thold = rssi_min_thold*16;
- sdata->u.mgd.rssi_max_thold = rssi_max_thold*16;
-}
-
-void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif,
- int rssi_min_thold,
- int rssi_max_thold)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-
- WARN_ON(rssi_min_thold == rssi_max_thold ||
- rssi_min_thold > rssi_max_thold);
-
- _ieee80211_enable_rssi_reports(sdata, rssi_min_thold,
- rssi_max_thold);
-}
-EXPORT_SYMBOL(ieee80211_enable_rssi_reports);
-
-void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-
- _ieee80211_enable_rssi_reports(sdata, 0, 0);
-}
-EXPORT_SYMBOL(ieee80211_disable_rssi_reports);
-
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u16 cap)
{
@@ -2918,10 +3000,11 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
he_cap->he_cap_elem.phy_cap_info);
}
-u8 *ieee80211_ie_build_he_cap(u8 *pos,
+u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end)
{
+ struct ieee80211_he_cap_elem elem;
u8 n;
u8 ie_len;
u8 *orig_pos = pos;
@@ -2934,7 +3017,23 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
if (!he_cap)
return orig_pos;
- n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+ /* modify on stack first to calculate 'n' and 'ie_len' correctly */
+ elem = he_cap->he_cap_elem;
+
+ if (disable_flags & IEEE80211_CONN_DISABLE_40MHZ)
+ elem.phy_cap_info[0] &=
+ ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G);
+
+ if (disable_flags & IEEE80211_CONN_DISABLE_160MHZ)
+ elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+ if (disable_flags & IEEE80211_CONN_DISABLE_80P80MHZ)
+ elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+
+ n = ieee80211_he_mcs_nss_size(&elem);
ie_len = 2 + 1 +
sizeof(he_cap->he_cap_elem) + n +
ieee80211_he_ppe_size(he_cap->ppe_thres[0],
@@ -2948,8 +3047,8 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
*pos++ = WLAN_EID_EXT_HE_CAPABILITY;
/* Fixed data */
- memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem));
- pos += sizeof(he_cap->he_cap_elem);
+ memcpy(pos, &elem, sizeof(elem));
+ pos += sizeof(elem);
memcpy(pos, &he_cap->he_mcs_nss_supp, n);
pos += n;
@@ -2985,6 +3084,7 @@ end:
}
void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_smps_mode smps_mode,
struct sk_buff *skb)
{
struct ieee80211_supported_band *sband;
@@ -3011,7 +3111,7 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
- switch (sdata->smps_mode) {
+ switch (smps_mode) {
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
@@ -3056,6 +3156,10 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
else
ht_oper->ht_param = IEEE80211_HT_PARAM_CHA_SEC_BELOW;
break;
+ case NL80211_CHAN_WIDTH_320:
+ /* HT information element should not be included on 6GHz */
+ WARN_ON(1);
+ return pos;
default:
ht_oper->ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE;
break;
@@ -3095,6 +3199,10 @@ void ieee80211_ie_build_wide_bw_cs(u8 *pos,
case NL80211_CHAN_WIDTH_80P80:
*pos++ = IEEE80211_VHT_CHANWIDTH_80P80MHZ;
break;
+ case NL80211_CHAN_WIDTH_320:
+ /* The behavior is not defined for 320 MHz channels */
+ WARN_ON(1);
+ fallthrough;
default:
*pos++ = IEEE80211_VHT_CHANWIDTH_USE_HT;
}
@@ -3147,6 +3255,10 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
case NL80211_CHAN_WIDTH_80:
vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
break;
+ case NL80211_CHAN_WIDTH_320:
+ /* VHT information element should not be included on 6GHz */
+ WARN_ON(1);
+ return pos;
default:
vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT;
break;
@@ -3207,6 +3319,13 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef)
he_6ghz_op->ccfs1 = 0;
switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_320:
+ /*
+ * TODO: mesh operation is not defined over 6GHz 320 MHz
+ * channels.
+ */
+ WARN_ON(1);
+ break;
case NL80211_CHAN_WIDTH_160:
/* Convert 160 MHz channel width to new style as interop
* workaround.
@@ -3262,7 +3381,6 @@ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
channel_type = NL80211_CHAN_HT40MINUS;
break;
default:
- channel_type = NL80211_CHAN_NO_HT;
return false;
}
@@ -3394,19 +3512,72 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
return true;
}
+void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper,
+ bool support_160, bool support_320,
+ struct cfg80211_chan_def *chandef)
+{
+ struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional;
+
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs0,
+ chandef->chan->band);
+
+ switch (u8_get_bits(info->control,
+ IEEE80211_EHT_OPER_CHAN_WIDTH)) {
+ case IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ:
+ chandef->width = NL80211_CHAN_WIDTH_20;
+ break;
+ case IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ:
+ chandef->width = NL80211_CHAN_WIDTH_40;
+ break;
+ case IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ:
+ chandef->width = NL80211_CHAN_WIDTH_80;
+ break;
+ case IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ:
+ if (support_160) {
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs1,
+ chandef->chan->band);
+ } else {
+ chandef->width = NL80211_CHAN_WIDTH_80;
+ }
+ break;
+ case IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ:
+ if (support_320) {
+ chandef->width = NL80211_CHAN_WIDTH_320;
+ chandef->center_freq1 =
+ ieee80211_channel_to_frequency(info->ccfs1,
+ chandef->chan->band);
+ } else if (support_160) {
+ chandef->width = NL80211_CHAN_WIDTH_160;
+ } else {
+ chandef->width = NL80211_CHAN_WIDTH_80;
+
+ if (chandef->center_freq1 > chandef->chan->center_freq)
+ chandef->center_freq1 -= 40;
+ else
+ chandef->center_freq1 += 40;
+ }
+ break;
+ }
+}
+
bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_he_operation *he_oper,
+ const struct ieee80211_eht_operation *eht_oper,
struct cfg80211_chan_def *chandef)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
const struct ieee80211_sta_he_cap *he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap;
struct cfg80211_chan_def he_chandef = *chandef;
const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
- bool support_80_80, support_160;
- u8 he_phy_cap;
+ bool support_80_80, support_160, support_320;
+ u8 he_phy_cap, eht_phy_cap;
u32 freq;
if (chandef->chan->band != NL80211_BAND_6GHZ)
@@ -3435,6 +3606,12 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
return false;
}
+ eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype);
+ if (!eht_cap) {
+ sdata_info(sdata, "Missing iftype sband data/EHT cap");
+ eht_oper = NULL;
+ }
+
he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
if (!he_6ghz_oper) {
@@ -3444,6 +3621,11 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
return false;
}
+ /*
+ * The EHT operation IE does not contain the primary channel so the
+ * primary channel frequency should be taken from the 6 GHz operation
+ * information.
+ */
freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary,
NL80211_BAND_6GHZ);
he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
@@ -3461,43 +3643,53 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
break;
}
- switch (u8_get_bits(he_6ghz_oper->control,
- IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) {
- case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ:
- he_chandef.width = NL80211_CHAN_WIDTH_20;
- break;
- case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ:
- he_chandef.width = NL80211_CHAN_WIDTH_40;
- break;
- case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ:
- he_chandef.width = NL80211_CHAN_WIDTH_80;
- break;
- case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ:
- he_chandef.width = NL80211_CHAN_WIDTH_80;
- if (!he_6ghz_oper->ccfs1)
+ if (!eht_oper ||
+ !(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) {
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) {
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_20;
+ break;
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_40;
+ break;
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_80;
+ break;
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_80;
+ if (!he_6ghz_oper->ccfs1)
+ break;
+ if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) {
+ if (support_160)
+ he_chandef.width = NL80211_CHAN_WIDTH_160;
+ } else {
+ if (support_80_80)
+ he_chandef.width = NL80211_CHAN_WIDTH_80P80;
+ }
break;
- if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) {
- if (support_160)
- he_chandef.width = NL80211_CHAN_WIDTH_160;
- } else {
- if (support_80_80)
- he_chandef.width = NL80211_CHAN_WIDTH_80P80;
}
- break;
- }
- if (he_chandef.width == NL80211_CHAN_WIDTH_160) {
- he_chandef.center_freq1 =
- ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
- NL80211_BAND_6GHZ);
- } else {
- he_chandef.center_freq1 =
- ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
- NL80211_BAND_6GHZ);
- if (support_80_80 || support_160)
- he_chandef.center_freq2 =
+ if (he_chandef.width == NL80211_CHAN_WIDTH_160) {
+ he_chandef.center_freq1 =
ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
NL80211_BAND_6GHZ);
+ } else {
+ he_chandef.center_freq1 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
+ NL80211_BAND_6GHZ);
+ if (support_80_80 || support_160)
+ he_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+ NL80211_BAND_6GHZ);
+ }
+ } else {
+ eht_phy_cap = eht_cap->eht_cap_elem.phy_cap_info[0];
+ support_320 =
+ eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ;
+
+ ieee80211_chandef_eht_oper(eht_oper, support_160,
+ support_320, &he_chandef);
}
if (!cfg80211_chandef_valid(&he_chandef)) {
@@ -3551,12 +3743,12 @@ bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
return true;
}
-int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
+int ieee80211_parse_bitrates(enum nl80211_chan_width width,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates)
{
- u32 rate_flags = ieee80211_chandef_rate_flags(chandef);
- int shift = ieee80211_chandef_get_shift(chandef);
+ u32 rate_flags = ieee80211_chanwidth_rate_flags(width);
+ int shift = ieee80211_chanwidth_get_shift(width);
struct ieee80211_rate *br;
int brate, rate, i, j, count = 0;
@@ -3677,13 +3869,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
int ieee80211_ave_rssi(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) {
- /* non-managed type inferfaces */
+ if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION))
return 0;
- }
- return -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
+
+ return -ewma_beacon_signal_read(&sdata->deflink.u.mgd.ave_beacon_signal);
}
EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
@@ -3875,11 +4065,11 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
* by the time it gets it, sdata->wdev.cac_started
* will no longer be true
*/
- cancel_delayed_work(&sdata->dfs_cac_timer_work);
+ cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work);
if (sdata->wdev.cac_started) {
chandef = sdata->vif.bss_conf.chandef;
- ieee80211_vif_release_channel(sdata);
+ ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev,
&chandef,
NL80211_RADAR_CAC_ABORTED,
@@ -3928,21 +4118,21 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_radar_detected);
-u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
+ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
{
- u32 ret;
+ ieee80211_conn_flags_t ret;
int tmp;
switch (c->width) {
case NL80211_CHAN_WIDTH_20:
c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
break;
case NL80211_CHAN_WIDTH_40:
c->width = NL80211_CHAN_WIDTH_20;
c->center_freq1 = c->chan->center_freq;
- ret = IEEE80211_STA_DISABLE_40MHZ |
- IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_40MHZ |
+ IEEE80211_CONN_DISABLE_VHT;
break;
case NL80211_CHAN_WIDTH_80:
tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
@@ -3951,13 +4141,13 @@ u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
/* freq_P40 */
c->center_freq1 = c->center_freq1 - 20 + 40 * tmp;
c->width = NL80211_CHAN_WIDTH_40;
- ret = IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_VHT;
break;
case NL80211_CHAN_WIDTH_80P80:
c->center_freq2 = 0;
c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ;
+ ret = IEEE80211_CONN_DISABLE_80P80MHZ |
+ IEEE80211_CONN_DISABLE_160MHZ;
break;
case NL80211_CHAN_WIDTH_160:
/* n_P20 */
@@ -3966,14 +4156,23 @@ u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
tmp /= 4;
c->center_freq1 = c->center_freq1 - 40 + 80 * tmp;
c->width = NL80211_CHAN_WIDTH_80;
- ret = IEEE80211_STA_DISABLE_80P80MHZ |
- IEEE80211_STA_DISABLE_160MHZ;
+ ret = IEEE80211_CONN_DISABLE_80P80MHZ |
+ IEEE80211_CONN_DISABLE_160MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_320:
+ /* n_P20 */
+ tmp = (150 + c->chan->center_freq - c->center_freq1) / 20;
+ /* n_P160 */
+ tmp /= 8;
+ c->center_freq1 = c->center_freq1 - 80 + 160 * tmp;
+ c->width = NL80211_CHAN_WIDTH_160;
+ ret = IEEE80211_CONN_DISABLE_320MHZ;
break;
default:
case NL80211_CHAN_WIDTH_20_NOHT:
WARN_ON_ONCE(1);
c->width = NL80211_CHAN_WIDTH_20_NOHT;
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
break;
case NL80211_CHAN_WIDTH_1:
case NL80211_CHAN_WIDTH_2:
@@ -3984,7 +4183,7 @@ u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
case NL80211_CHAN_WIDTH_10:
WARN_ON_ONCE(1);
/* keep c->width */
- ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
+ ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT;
break;
}
@@ -4105,74 +4304,6 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
return 0;
}
-bool ieee80211_cs_valid(const struct ieee80211_cipher_scheme *cs)
-{
- return !(cs == NULL || cs->cipher == 0 ||
- cs->hdr_len < cs->pn_len + cs->pn_off ||
- cs->hdr_len <= cs->key_idx_off ||
- cs->key_idx_shift > 7 ||
- cs->key_idx_mask == 0);
-}
-
-bool ieee80211_cs_list_valid(const struct ieee80211_cipher_scheme *cs, int n)
-{
- int i;
-
- /* Ensure we have enough iftype bitmap space for all iftype values */
- WARN_ON((NUM_NL80211_IFTYPES / 8 + 1) > sizeof(cs[0].iftype));
-
- for (i = 0; i < n; i++)
- if (!ieee80211_cs_valid(&cs[i]))
- return false;
-
- return true;
-}
-
-const struct ieee80211_cipher_scheme *
-ieee80211_cs_get(struct ieee80211_local *local, u32 cipher,
- enum nl80211_iftype iftype)
-{
- const struct ieee80211_cipher_scheme *l = local->hw.cipher_schemes;
- int n = local->hw.n_cipher_schemes;
- int i;
- const struct ieee80211_cipher_scheme *cs = NULL;
-
- for (i = 0; i < n; i++) {
- if (l[i].cipher == cipher) {
- cs = &l[i];
- break;
- }
- }
-
- if (!cs || !(cs->iftype & BIT(iftype)))
- return NULL;
-
- return cs;
-}
-
-int ieee80211_cs_headroom(struct ieee80211_local *local,
- struct cfg80211_crypto_settings *crypto,
- enum nl80211_iftype iftype)
-{
- const struct ieee80211_cipher_scheme *cs;
- int headroom = IEEE80211_ENCRYPT_HEADROOM;
- int i;
-
- for (i = 0; i < crypto->n_ciphers_pairwise; i++) {
- cs = ieee80211_cs_get(local, crypto->ciphers_pairwise[i],
- iftype);
-
- if (cs && headroom < cs->hdr_len)
- headroom = cs->hdr_len;
- }
-
- cs = ieee80211_cs_get(local, crypto->cipher_group, iftype);
- if (cs && headroom < cs->hdr_len)
- headroom = cs->hdr_len;
-
- return headroom;
-}
-
static bool
ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i)
{
@@ -4364,7 +4495,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_link_data *link;
u8 radar_detect = 0;
lockdep_assert_held(&local->chanctx_mtx);
@@ -4372,20 +4503,24 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
if (WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED))
return 0;
- list_for_each_entry(sdata, &ctx->reserved_vifs, reserved_chanctx_list)
- if (sdata->reserved_radar_required)
- radar_detect |= BIT(sdata->reserved_chandef.width);
+ list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list)
+ if (link->reserved_radar_required)
+ radar_detect |= BIT(link->reserved_chandef.width);
/*
* An in-place reservation context should not have any assigned vifs
* until it replaces the other context.
*/
WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER &&
- !list_empty(&ctx->assigned_vifs));
+ !list_empty(&ctx->assigned_links));
+
+ list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) {
+ if (!link->radar_required)
+ continue;
- list_for_each_entry(sdata, &ctx->assigned_vifs, assigned_chanctx_list)
- if (sdata->radar_required)
- radar_detect |= BIT(sdata->vif.bss_conf.chandef.width);
+ radar_detect |=
+ BIT(link->conf->chandef.width);
+ }
return radar_detect;
}
@@ -4633,3 +4768,104 @@ u16 ieee80211_encode_usf(int listen_interval)
return (u16) listen_interval;
}
+
+u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
+{
+ const struct ieee80211_sta_he_cap *he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap;
+ struct ieee80211_supported_band *sband;
+ bool is_ap;
+ u8 n;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return 0;
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+ eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype);
+ if (!he_cap || !eht_cap)
+ return 0;
+
+ is_ap = iftype == NL80211_IFTYPE_AP ||
+ iftype == NL80211_IFTYPE_P2P_GO;
+
+ n = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
+ &eht_cap->eht_cap_elem,
+ is_ap);
+ return 2 + 1 +
+ sizeof(he_cap->he_cap_elem) + n +
+ ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
+ eht_cap->eht_cap_elem.phy_cap_info);
+ return 0;
+}
+
+u8 *ieee80211_ie_build_eht_cap(u8 *pos,
+ const struct ieee80211_sta_he_cap *he_cap,
+ const struct ieee80211_sta_eht_cap *eht_cap,
+ u8 *end,
+ bool for_ap)
+{
+ u8 mcs_nss_len, ppet_len;
+ u8 ie_len;
+ u8 *orig_pos = pos;
+
+ /* Make sure we have place for the IE */
+ if (!he_cap || !eht_cap)
+ return orig_pos;
+
+ mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
+ &eht_cap->eht_cap_elem,
+ for_ap);
+ ppet_len = ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0],
+ eht_cap->eht_cap_elem.phy_cap_info);
+
+ ie_len = 2 + 1 + sizeof(eht_cap->eht_cap_elem) + mcs_nss_len + ppet_len;
+ if ((end - pos) < ie_len)
+ return orig_pos;
+
+ *pos++ = WLAN_EID_EXTENSION;
+ *pos++ = ie_len - 2;
+ *pos++ = WLAN_EID_EXT_EHT_CAPABILITY;
+
+ /* Fixed data */
+ memcpy(pos, &eht_cap->eht_cap_elem, sizeof(eht_cap->eht_cap_elem));
+ pos += sizeof(eht_cap->eht_cap_elem);
+
+ memcpy(pos, &eht_cap->eht_mcs_nss_supp, mcs_nss_len);
+ pos += mcs_nss_len;
+
+ if (ppet_len) {
+ memcpy(pos, &eht_cap->eht_ppe_thres, ppet_len);
+ pos += ppet_len;
+ }
+
+ return pos;
+}
+
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos)
+{
+ unsigned int elem_len;
+
+ if (!len_pos)
+ return;
+
+ elem_len = skb->data + skb->len - len_pos - 1;
+
+ while (elem_len > 255) {
+ /* this one is 255 */
+ *len_pos = 255;
+ /* remaining data gets smaller */
+ elem_len -= 255;
+ /* make space for the fragment ID/len in SKB */
+ skb_put(skb, 2);
+ /* shift back the remaining data to place fragment ID/len */
+ memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len);
+ /* place the fragment ID */
+ len_pos += 255 + 1;
+ *len_pos = WLAN_EID_FRAGMENT;
+ /* and point to fragment length to update later */
+ len_pos++;
+ }
+
+ *len_pos = elem_len;
+}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index e856f9092137..803de5881485 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -116,16 +116,16 @@ void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_vht_cap *vht_cap_ie,
- struct sta_info *sta)
+ struct link_sta_info *link_sta)
{
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
struct ieee80211_sta_vht_cap own_cap;
u32 cap_info, i;
bool have_80mhz;
memset(vht_cap, 0, sizeof(*vht_cap));
- if (!sta->sta.ht_cap.ht_supported)
+ if (!link_sta->pub->ht_cap.ht_supported)
return;
if (!vht_cap_ie || !sband->vht_cap.vht_supported)
@@ -162,7 +162,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
* our own capabilities and then use those below.
*/
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ !test_sta_flag(link_sta->sta, WLAN_STA_TDLS_PEER))
ieee80211_apply_vhtcap_overrides(sdata, &own_cap);
/* take some capabilities as-is */
@@ -286,8 +286,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
*/
if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) {
vht_cap->vht_supported = false;
- sdata_info(sdata, "Ignoring VHT IE from %pM due to invalid rx_mcs_map\n",
- sta->addr);
+ sdata_info(sdata,
+ "Ignoring VHT IE from %pM (link:%pM) due to invalid rx_mcs_map\n",
+ link_sta->sta->addr, link_sta->addr);
return;
}
@@ -295,10 +296,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
break;
default:
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
if (!(vht_cap->vht_mcs.tx_highest &
cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
@@ -310,54 +311,86 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
* above) between 160 and 80+80 yet.
*/
if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ link_sta->cur_max_bandwidth =
+ IEEE80211_STA_RX_BW_160;
}
- sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+ link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
+ /*
+ * FIXME - should the amsdu len be per link? store per link
+ * and maintain a minimum?
+ */
switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
break;
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
break;
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
default:
- sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
break;
}
+
+ ieee80211_sta_recalc_aggregates(&link_sta->sta->sta);
}
-/* FIXME: move this to some better location - parses HE now */
-enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
+/* FIXME: move this to some better location - parses HE/EHT now */
+enum ieee80211_sta_rx_bandwidth
+ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta)
{
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
- struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+ unsigned int link_id = link_sta->link_id;
+ struct ieee80211_sub_if_data *sdata = link_sta->sta->sdata;
+ struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
+ struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap;
+ struct ieee80211_sta_eht_cap *eht_cap = &link_sta->pub->eht_cap;
u32 cap_width;
if (he_cap->has_he) {
- u8 info = he_cap->he_cap_elem.phy_cap_info[0];
+ struct ieee80211_bss_conf *link_conf;
+ enum ieee80211_sta_rx_bandwidth ret;
+ u8 info;
+
+ rcu_read_lock();
+ link_conf = rcu_dereference(sdata->vif.link_conf[link_id]);
+
+ if (eht_cap->has_eht &&
+ link_conf->chandef.chan->band == NL80211_BAND_6GHZ) {
+ info = eht_cap->eht_cap_elem.phy_cap_info[0];
- if (sta->sdata->vif.bss_conf.chandef.chan->band ==
- NL80211_BAND_2GHZ) {
+ if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) {
+ ret = IEEE80211_STA_RX_BW_320;
+ goto out;
+ }
+ }
+
+ info = he_cap->he_cap_elem.phy_cap_info[0];
+
+ if (link_conf->chandef.chan->band == NL80211_BAND_2GHZ) {
if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G)
- return IEEE80211_STA_RX_BW_40;
+ ret = IEEE80211_STA_RX_BW_40;
else
- return IEEE80211_STA_RX_BW_20;
+ ret = IEEE80211_STA_RX_BW_20;
+ goto out;
}
if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G ||
info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
- return IEEE80211_STA_RX_BW_160;
+ ret = IEEE80211_STA_RX_BW_160;
else if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G)
- return IEEE80211_STA_RX_BW_80;
+ ret = IEEE80211_STA_RX_BW_80;
+ else
+ ret = IEEE80211_STA_RX_BW_20;
+out:
+ rcu_read_unlock();
- return IEEE80211_STA_RX_BW_20;
+ return ret;
}
if (!vht_cap->vht_supported)
- return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ return link_sta->pub->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
IEEE80211_STA_RX_BW_40 :
IEEE80211_STA_RX_BW_20;
@@ -378,16 +411,17 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
return IEEE80211_STA_RX_BW_80;
}
-enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
+enum nl80211_chan_width
+ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta)
{
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
u32 cap_width;
if (!vht_cap->vht_supported) {
- if (!sta->sta.ht_cap.ht_supported)
+ if (!link_sta->pub->ht_cap.ht_supported)
return NL80211_CHAN_WIDTH_20_NOHT;
- return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ return link_sta->pub->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20;
}
@@ -402,15 +436,17 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
}
enum nl80211_chan_width
-ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta)
+ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *link_sta)
{
- enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth;
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ enum ieee80211_sta_rx_bandwidth cur_bw =
+ link_sta->pub->bandwidth;
+ struct ieee80211_sta_vht_cap *vht_cap =
+ &link_sta->pub->vht_cap;
u32 cap_width;
switch (cur_bw) {
case IEEE80211_STA_RX_BW_20:
- if (!sta->sta.ht_cap.ht_supported)
+ if (!link_sta->pub->ht_cap.ht_supported)
return NL80211_CHAN_WIDTH_20_NOHT;
else
return NL80211_CHAN_WIDTH_20;
@@ -445,6 +481,8 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
case NL80211_CHAN_WIDTH_160:
case NL80211_CHAN_WIDTH_80P80:
return IEEE80211_STA_RX_BW_160;
+ case NL80211_CHAN_WIDTH_320:
+ return IEEE80211_STA_RX_BW_320;
default:
WARN_ON_ONCE(1);
return IEEE80211_STA_RX_BW_20;
@@ -452,14 +490,24 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
}
/* FIXME: rename/move - this deals with everything not just VHT */
-enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
+enum ieee80211_sta_rx_bandwidth
+ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta)
{
- struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct sta_info *sta = link_sta->sta;
+ struct ieee80211_bss_conf *link_conf;
+ enum nl80211_chan_width bss_width;
enum ieee80211_sta_rx_bandwidth bw;
- enum nl80211_chan_width bss_width = sdata->vif.bss_conf.chandef.width;
- bw = ieee80211_sta_cap_rx_bw(sta);
- bw = min(bw, sta->cur_max_bandwidth);
+ rcu_read_lock();
+ link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]);
+ if (WARN_ON(!link_conf))
+ bss_width = NL80211_CHAN_WIDTH_20_NOHT;
+ else
+ bss_width = link_conf->chandef.width;
+ rcu_read_unlock();
+
+ bw = ieee80211_sta_cap_rx_bw(link_sta);
+ bw = min(bw, link_sta->cur_max_bandwidth);
/* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
* IEEE80211-2016 specification makes higher bandwidth operation
@@ -481,19 +529,30 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
return bw;
}
-void ieee80211_sta_set_rx_nss(struct sta_info *sta)
+void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta)
{
- u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, rx_nss;
+ u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, eht_rx_nss = 0, rx_nss;
bool support_160;
/* if we received a notification already don't overwrite it */
- if (sta->sta.rx_nss)
+ if (link_sta->pub->rx_nss)
return;
- if (sta->sta.he_cap.has_he) {
+ if (link_sta->pub->eht_cap.has_eht) {
+ int i;
+ const u8 *rx_nss_mcs = (void *)&link_sta->pub->eht_cap.eht_mcs_nss_supp;
+
+ /* get the max nss for EHT over all possible bandwidths and mcs */
+ for (i = 0; i < sizeof(struct ieee80211_eht_mcs_nss_supp); i++)
+ eht_rx_nss = max_t(u8, eht_rx_nss,
+ u8_get_bits(rx_nss_mcs[i],
+ IEEE80211_EHT_MCS_NSS_RX));
+ }
+
+ if (link_sta->pub->he_cap.has_he) {
int i;
u8 rx_mcs_80 = 0, rx_mcs_160 = 0;
- const struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+ const struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap;
u16 mcs_160_map =
le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160);
u16 mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80);
@@ -501,7 +560,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
for (i = 7; i >= 0; i--) {
u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3;
- if (mcs_160 != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+ if (mcs_160 != IEEE80211_HE_MCS_NOT_SUPPORTED) {
rx_mcs_160 = i + 1;
break;
}
@@ -509,7 +568,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
for (i = 7; i >= 0; i--) {
u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3;
- if (mcs_80 != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
+ if (mcs_80 != IEEE80211_HE_MCS_NOT_SUPPORTED) {
rx_mcs_80 = i + 1;
break;
}
@@ -524,23 +583,23 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
he_rx_nss = rx_mcs_80;
}
- if (sta->sta.ht_cap.ht_supported) {
- if (sta->sta.ht_cap.mcs.rx_mask[0])
+ if (link_sta->pub->ht_cap.ht_supported) {
+ if (link_sta->pub->ht_cap.mcs.rx_mask[0])
ht_rx_nss++;
- if (sta->sta.ht_cap.mcs.rx_mask[1])
+ if (link_sta->pub->ht_cap.mcs.rx_mask[1])
ht_rx_nss++;
- if (sta->sta.ht_cap.mcs.rx_mask[2])
+ if (link_sta->pub->ht_cap.mcs.rx_mask[2])
ht_rx_nss++;
- if (sta->sta.ht_cap.mcs.rx_mask[3])
+ if (link_sta->pub->ht_cap.mcs.rx_mask[3])
ht_rx_nss++;
/* FIXME: consider rx_highest? */
}
- if (sta->sta.vht_cap.vht_supported) {
+ if (link_sta->pub->vht_cap.vht_supported) {
int i;
u16 rx_mcs_map;
- rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map);
+ rx_mcs_map = le16_to_cpu(link_sta->pub->vht_cap.vht_mcs.rx_mcs_map);
for (i = 7; i >= 0; i--) {
u8 mcs = (rx_mcs_map >> (2 * i)) & 3;
@@ -555,12 +614,13 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
rx_nss = max(vht_rx_nss, ht_rx_nss);
rx_nss = max(he_rx_nss, rx_nss);
- sta->sta.rx_nss = max_t(u8, 1, rx_nss);
+ rx_nss = max(eht_rx_nss, rx_nss);
+ link_sta->pub->rx_nss = max_t(u8, 1, rx_nss);
}
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 opmode,
- enum nl80211_band band)
+ struct link_sta_info *link_sta,
+ u8 opmode, enum nl80211_band band)
{
enum ieee80211_sta_rx_bandwidth new_bw;
struct sta_opmode_info sta_opmode = {};
@@ -575,8 +635,8 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
nss += 1;
- if (sta->sta.rx_nss != nss) {
- sta->sta.rx_nss = nss;
+ if (link_sta->pub->rx_nss != nss) {
+ link_sta->pub->rx_nss = nss;
sta_opmode.rx_nss = nss;
changed |= IEEE80211_RC_NSS_CHANGED;
sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
@@ -585,88 +645,97 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
/* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
/* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80)
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
else
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
/* legacy only, no longer used by newer spec */
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
break;
}
- new_bw = ieee80211_sta_cur_vht_bw(sta);
- if (new_bw != sta->sta.bandwidth) {
- sta->sta.bandwidth = new_bw;
- sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta);
+ new_bw = ieee80211_sta_cur_vht_bw(link_sta);
+ if (new_bw != link_sta->pub->bandwidth) {
+ link_sta->pub->bandwidth = new_bw;
+ sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(link_sta);
changed |= IEEE80211_RC_BW_CHANGED;
sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
}
if (sta_opmode.changed)
- cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+ cfg80211_sta_opmode_change_notify(sdata->dev, link_sta->addr,
&sta_opmode, GFP_KERNEL);
return changed;
}
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
struct ieee80211_mgmt *mgmt)
{
- struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+ struct ieee80211_bss_conf *link_conf = link->conf;
- if (!sdata->vif.mu_mimo_owner)
+ if (!link_conf->mu_mimo_owner)
return;
if (!memcmp(mgmt->u.action.u.vht_group_notif.position,
- bss_conf->mu_group.position, WLAN_USER_POSITION_LEN) &&
+ link_conf->mu_group.position, WLAN_USER_POSITION_LEN) &&
!memcmp(mgmt->u.action.u.vht_group_notif.membership,
- bss_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN))
+ link_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN))
return;
- memcpy(bss_conf->mu_group.membership,
+ memcpy(link_conf->mu_group.membership,
mgmt->u.action.u.vht_group_notif.membership,
WLAN_MEMBERSHIP_LEN);
- memcpy(bss_conf->mu_group.position,
+ memcpy(link_conf->mu_group.position,
mgmt->u.action.u.vht_group_notif.position,
WLAN_USER_POSITION_LEN);
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
+ ieee80211_link_info_change_notify(sdata, link,
+ BSS_CHANGED_MU_GROUPS);
}
-void ieee80211_update_mu_groups(struct ieee80211_vif *vif,
+void ieee80211_update_mu_groups(struct ieee80211_vif *vif, unsigned int link_id,
const u8 *membership, const u8 *position)
{
- struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+ struct ieee80211_bss_conf *link_conf;
- if (WARN_ON_ONCE(!vif->mu_mimo_owner))
- return;
+ rcu_read_lock();
+ link_conf = rcu_dereference(vif->link_conf[link_id]);
- memcpy(bss_conf->mu_group.membership, membership, WLAN_MEMBERSHIP_LEN);
- memcpy(bss_conf->mu_group.position, position, WLAN_USER_POSITION_LEN);
+ if (!WARN_ON_ONCE(!link_conf || !link_conf->mu_mimo_owner)) {
+ memcpy(link_conf->mu_group.membership, membership,
+ WLAN_MEMBERSHIP_LEN);
+ memcpy(link_conf->mu_group.position, position,
+ WLAN_USER_POSITION_LEN);
+ }
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ieee80211_update_mu_groups);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 opmode,
- enum nl80211_band band)
+ struct link_sta_info *link_sta,
+ u8 opmode, enum nl80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
- u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band);
+ u32 changed = __ieee80211_vht_handle_opmode(sdata, link_sta,
+ opmode, band);
if (changed > 0) {
- ieee80211_recalc_min_chandef(sdata);
- rate_control_rate_update(local, sband, sta, changed);
+ ieee80211_recalc_min_chandef(sdata, link_sta->link_id);
+ rate_control_rate_update(local, sband, link_sta->sta,
+ link_sta->link_id, changed);
}
}
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 62c6733e0792..ecc1de2e68a5 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -2,6 +2,7 @@
/*
* Copyright 2004, Instant802 Networks, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2022 Intel Corporation
*/
#include <linux/netdevice.h>
@@ -147,8 +148,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
bool qos;
/* all mesh/ocb stations are required to support WME */
- if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
- sdata->vif.type == NL80211_IFTYPE_OCB)
+ if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+ sdata->vif.type == NL80211_IFTYPE_OCB))
qos = true;
else if (sta)
qos = sta->sta.wme;
@@ -210,7 +211,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
if (sta)
break;
- ra = sdata->u.mgd.bssid;
+ ra = sdata->deflink.u.mgd.bssid;
break;
case NL80211_IFTYPE_ADHOC:
ra = skb->data;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 7ed0d268aff2..20f742b5503b 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -3,7 +3,7 @@
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2008, Jouni Malinen <j@w1.fi>
* Copyright (C) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
*/
#include <linux/netdevice.h>
@@ -311,19 +311,21 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
-
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+/*
+ * Calculate AAD for CCMP/GCMP, returning qos_tid since we
+ * need that in CCMP also for b_0.
+ */
+static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
{
+ struct ieee80211_hdr *hdr = (void *)skb->data;
__le16 mask_fc;
int a4_included, mgmt;
u8 qos_tid;
- u16 len_a;
- unsigned int hdrlen;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ u16 len_a = 22;
/*
* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
- * Retry, PwrMgt, MoreData; set Protected
+ * Retry, PwrMgt, MoreData, Order (if Qos Data); set Protected
*/
mgmt = ieee80211_is_mgmt(hdr->frame_control);
mask_fc = hdr->frame_control;
@@ -333,36 +335,23 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
mask_fc &= ~cpu_to_le16(0x0070);
mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
- len_a = hdrlen - 2;
a4_included = ieee80211_has_a4(hdr->frame_control);
+ if (a4_included)
+ len_a += 6;
- if (ieee80211_is_data_qos(hdr->frame_control))
+ if (ieee80211_is_data_qos(hdr->frame_control)) {
qos_tid = ieee80211_get_tid(hdr);
- else
+ mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER);
+ len_a += 2;
+ } else {
qos_tid = 0;
-
- /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
- * mode authentication are not allowed to collide, yet both are derived
- * from this vector b_0. We only set L := 1 here to indicate that the
- * data size can be represented in (L+1) bytes. The CCM layer will take
- * care of storing the data length in the top (L+1) bytes and setting
- * and clearing the other bits as is required to derive the two IVs.
- */
- b_0[0] = 0x1;
-
- /* Nonce: Nonce Flags | A2 | PN
- * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
- */
- b_0[1] = qos_tid | (mgmt << 4);
- memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
- memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN);
+ }
/* AAD (extra authenticate-only data) / masked 802.11 header
* FC | A1 | A2 | A3 | SC | [A4] | [QC] */
put_unaligned_be16(len_a, &aad[0]);
put_unaligned(mask_fc, (__le16 *)&aad[2]);
- memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
+ memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN);
/* Mask Seq#, leave Frag# */
aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f;
@@ -376,8 +365,31 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
aad[24] = qos_tid;
}
+
+ return qos_tid;
}
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ u8 qos_tid = ccmp_gcmp_aad(skb, aad);
+
+ /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
+ * mode authentication are not allowed to collide, yet both are derived
+ * from this vector b_0. We only set L := 1 here to indicate that the
+ * data size can be represented in (L+1) bytes. The CCM layer will take
+ * care of storing the data length in the top (L+1) bytes and setting
+ * and clearing the other bits as is required to derive the two IVs.
+ */
+ b_0[0] = 0x1;
+
+ /* Nonce: Nonce Flags | A2 | PN
+ * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
+ */
+ b_0[1] = qos_tid | (ieee80211_is_mgmt(hdr->frame_control) << 4);
+ memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
+ memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN);
+}
static inline void ccmp_pn2hdr(u8 *hdr, u8 *pn, int key_id)
{
@@ -571,9 +583,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
{
- __le16 mask_fc;
- u8 qos_tid;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
memcpy(j_0, hdr->addr2, ETH_ALEN);
memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN);
@@ -581,40 +591,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
j_0[14] = 0;
j_0[AES_BLOCK_SIZE - 1] = 0x01;
- /* AAD (extra authenticate-only data) / masked 802.11 header
- * FC | A1 | A2 | A3 | SC | [A4] | [QC]
- */
- put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]);
- /* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
- * Retry, PwrMgt, MoreData; set Protected
- */
- mask_fc = hdr->frame_control;
- mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
- IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
- if (!ieee80211_is_mgmt(hdr->frame_control))
- mask_fc &= ~cpu_to_le16(0x0070);
- mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-
- put_unaligned(mask_fc, (__le16 *)&aad[2]);
- memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
-
- /* Mask Seq#, leave Frag# */
- aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f;
- aad[23] = 0;
-
- if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = ieee80211_get_tid(hdr);
- else
- qos_tid = 0;
-
- if (ieee80211_has_a4(hdr->frame_control)) {
- memcpy(&aad[24], hdr->addr4, ETH_ALEN);
- aad[30] = qos_tid;
- aad[31] = 0;
- } else {
- memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
- aad[24] = qos_tid;
- }
+ ccmp_gcmp_aad(skb, aad);
}
static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id)
@@ -801,102 +778,6 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
-static ieee80211_tx_result
-ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx,
- struct sk_buff *skb)
-{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- struct ieee80211_key *key = tx->key;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- int hdrlen;
- u8 *pos, iv_len = key->conf.iv_len;
-
- if (info->control.hw_key &&
- !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) {
- /* hwaccel has no need for preallocated head room */
- return TX_CONTINUE;
- }
-
- if (unlikely(skb_headroom(skb) < iv_len &&
- pskb_expand_head(skb, iv_len, 0, GFP_ATOMIC)))
- return TX_DROP;
-
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
-
- pos = skb_push(skb, iv_len);
- memmove(pos, pos + iv_len, hdrlen);
-
- return TX_CONTINUE;
-}
-
-static inline int ieee80211_crypto_cs_pn_compare(u8 *pn1, u8 *pn2, int len)
-{
- int i;
-
- /* pn is little endian */
- for (i = len - 1; i >= 0; i--) {
- if (pn1[i] < pn2[i])
- return -1;
- else if (pn1[i] > pn2[i])
- return 1;
- }
-
- return 0;
-}
-
-static ieee80211_rx_result
-ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx)
-{
- struct ieee80211_key *key = rx->key;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
- const struct ieee80211_cipher_scheme *cs = NULL;
- int hdrlen = ieee80211_hdrlen(hdr->frame_control);
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
- int data_len;
- u8 *rx_pn;
- u8 *skb_pn;
- u8 qos_tid;
-
- if (!rx->sta || !rx->sta->cipher_scheme ||
- !(status->flag & RX_FLAG_DECRYPTED))
- return RX_DROP_UNUSABLE;
-
- if (!ieee80211_is_data(hdr->frame_control))
- return RX_CONTINUE;
-
- cs = rx->sta->cipher_scheme;
-
- data_len = rx->skb->len - hdrlen - cs->hdr_len;
-
- if (data_len < 0)
- return RX_DROP_UNUSABLE;
-
- if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = ieee80211_get_tid(hdr);
- else
- qos_tid = 0;
-
- if (skb_linearize(rx->skb))
- return RX_DROP_UNUSABLE;
-
- rx_pn = key->u.gen.rx_pn[qos_tid];
- skb_pn = rx->skb->data + hdrlen + cs->pn_off;
-
- if (ieee80211_crypto_cs_pn_compare(skb_pn, rx_pn, cs->pn_len) <= 0)
- return RX_DROP_UNUSABLE;
-
- memcpy(rx_pn, skb_pn, cs->pn_len);
-
- /* remove security header and MIC */
- if (pskb_trim(rx->skb, rx->skb->len - cs->mic_len))
- return RX_DROP_UNUSABLE;
-
- memmove(rx->skb->data + cs->hdr_len, rx->skb->data, hdrlen);
- skb_pull(rx->skb, cs->hdr_len);
-
- return RX_CONTINUE;
-}
-
static void bip_aad(struct sk_buff *skb, u8 *aad)
{
__le16 mask_fc;
@@ -911,7 +792,7 @@ static void bip_aad(struct sk_buff *skb, u8 *aad)
IEEE80211_FCTL_MOREDATA);
put_unaligned(mask_fc, (__le16 *) &aad[0]);
/* A1 || A2 || A3 */
- memcpy(aad + 2, &hdr->addr1, 3 * ETH_ALEN);
+ memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN);
}
@@ -1235,38 +1116,3 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
-
-ieee80211_tx_result
-ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx)
-{
- struct sk_buff *skb;
- struct ieee80211_tx_info *info = NULL;
- ieee80211_tx_result res;
-
- skb_queue_walk(&tx->skbs, skb) {
- info = IEEE80211_SKB_CB(skb);
-
- /* handle hw-only algorithm */
- if (!info->control.hw_key)
- return TX_DROP;
-
- if (tx->key->flags & KEY_FLAG_CIPHER_SCHEME) {
- res = ieee80211_crypto_cs_encrypt(tx, skb);
- if (res != TX_CONTINUE)
- return res;
- }
- }
-
- ieee80211_tx_set_protected(tx);
-
- return TX_CONTINUE;
-}
-
-ieee80211_rx_result
-ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx)
-{
- if (rx->sta && rx->sta->cipher_scheme)
- return ieee80211_crypto_cs_decrypt(rx);
-
- return RX_DROP_UNUSABLE;
-}
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index af3272284e85..a9a81abb5479 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2002-2004, Instant802 Networks, Inc.
+ * Copyright (C) 2022 Intel Corporation
*/
#ifndef WPA_H
@@ -39,10 +40,6 @@ ieee80211_tx_result
ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx);
ieee80211_rx_result
ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx);
-ieee80211_tx_result
-ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
-ieee80211_rx_result
-ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx);
ieee80211_tx_result
ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx);
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index fbeebe3bc31d..1e4a9f74ed43 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -118,6 +118,7 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
if (!ret) {
wpan_phy->current_page = page;
wpan_phy->current_channel = channel;
+ ieee802154_configure_durations(wpan_phy);
}
return ret;
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 702560acc8ce..1381e6a5e180 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -56,6 +56,8 @@ struct ieee802154_local {
struct sk_buff *tx_skb;
struct work_struct tx_work;
+ /* A negative Linux error code or a null/positive MLME error status */
+ int tx_result;
};
enum {
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 520cedc594e1..bd7bdb1219dd 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -113,6 +113,50 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
}
EXPORT_SYMBOL(ieee802154_alloc_hw);
+void ieee802154_configure_durations(struct wpan_phy *phy)
+{
+ u32 duration = 0;
+
+ switch (phy->current_page) {
+ case 0:
+ if (BIT(phy->current_channel) & 0x1)
+ /* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */
+ duration = 50 * NSEC_PER_USEC;
+ else if (BIT(phy->current_channel) & 0x7FE)
+ /* 915 MHz BPSK 802.15.4-2003: 40 ksym/s */
+ duration = 25 * NSEC_PER_USEC;
+ else if (BIT(phy->current_channel) & 0x7FFF800)
+ /* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
+ duration = 16 * NSEC_PER_USEC;
+ break;
+ case 2:
+ if (BIT(phy->current_channel) & 0x1)
+ /* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */
+ duration = 40 * NSEC_PER_USEC;
+ else if (BIT(phy->current_channel) & 0x7FE)
+ /* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
+ duration = 16 * NSEC_PER_USEC;
+ break;
+ case 3:
+ if (BIT(phy->current_channel) & 0x3FFF)
+ /* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */
+ duration = 6 * NSEC_PER_USEC;
+ break;
+ default:
+ break;
+ }
+
+ if (!duration) {
+ pr_debug("Unknown PHY symbol duration\n");
+ return;
+ }
+
+ phy->symbol_duration = duration;
+ phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+ phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+}
+EXPORT_SYMBOL(ieee802154_configure_durations);
+
void ieee802154_free_hw(struct ieee802154_hw *hw)
{
struct ieee802154_local *local = hw_to_local(hw);
@@ -131,10 +175,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
* Should be done when all drivers sets this value.
*/
- wpan_phy->lifs_period = IEEE802154_LIFS_PERIOD *
- wpan_phy->symbol_duration;
- wpan_phy->sifs_period = IEEE802154_SIFS_PERIOD *
- wpan_phy->symbol_duration;
+ wpan_phy->lifs_period =
+ (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
+ wpan_phy->sifs_period =
+ (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
}
int ieee802154_register_hw(struct ieee802154_hw *hw)
@@ -157,6 +201,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
ieee802154_setup_wpan_phy_pib(local->phy);
+ ieee802154_configure_durations(local->phy);
+
if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) {
local->phy->supported.min_csma_backoffs = 4;
local->phy->supported.max_csma_backoffs = 4;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index b8ce84618a55..726b47a4611b 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -44,7 +44,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE)
+ if (hdr->source.mode != IEEE802154_ADDR_NONE)
/* FIXME: check if we are PAN coordinator */
skb->pkt_type = PACKET_OTHERHOST;
else
@@ -132,7 +132,7 @@ static int
ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr)
{
int hlen;
- struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+ struct ieee802154_mac_cb *cb = mac_cb(skb);
skb_reset_mac_header(skb);
@@ -294,8 +294,9 @@ void
ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi)
{
struct ieee802154_local *local = hw_to_local(hw);
+ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
- mac_cb(skb)->lqi = lqi;
+ cb->lqi = lqi;
skb->pkt_type = IEEE802154_RX_MSG;
skb_queue_tail(&local->skb_queue, skb);
tasklet_schedule(&local->tasklet);
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index f2078238718b..9f024d85563b 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -58,8 +58,11 @@ enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer)
void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
bool ifs_handling)
{
+ struct ieee802154_local *local = hw_to_local(hw);
+
+ local->tx_result = IEEE802154_SUCCESS;
+
if (ifs_handling) {
- struct ieee802154_local *local = hw_to_local(hw);
u8 max_sifs_size;
/* If transceiver sets CRC on his own we need to use lifs
@@ -88,6 +91,23 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
}
EXPORT_SYMBOL(ieee802154_xmit_complete);
+void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb,
+ int reason)
+{
+ struct ieee802154_local *local = hw_to_local(hw);
+
+ local->tx_result = reason;
+ ieee802154_wake_queue(hw);
+ dev_kfree_skb_any(skb);
+}
+EXPORT_SYMBOL(ieee802154_xmit_error);
+
+void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+ ieee802154_xmit_error(hw, skb, IEEE802154_SYSTEM_ERROR);
+}
+EXPORT_SYMBOL(ieee802154_xmit_hw_error);
+
void ieee802154_stop_device(struct ieee802154_local *local)
{
flush_workqueue(local->workqueue);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index c921de63b494..fc9e728b6333 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -6,6 +6,7 @@
* Copyright (c) 2021 Google
*/
+#include <linux/compat.h>
#include <linux/if_arp.h>
#include <linux/net.h>
#include <linux/mctp.h>
@@ -21,6 +22,8 @@
/* socket implementation */
+static void mctp_sk_expire_keys(struct timer_list *timer);
+
static int mctp_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -90,22 +93,29 @@ out_release:
static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
- const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
int rc, addrlen = msg->msg_namelen;
struct sock *sk = sock->sk;
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb;
struct mctp_route *rt;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ int hlen;
if (addr) {
+ const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER |
+ MCTP_TAG_PREALLOC;
+
if (addrlen < sizeof(struct sockaddr_mctp))
return -EINVAL;
if (addr->smctp_family != AF_MCTP)
return -EINVAL;
if (!mctp_sockaddr_is_ok(addr))
return -EINVAL;
- if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ if (addr->smctp_tag & ~tagbits)
+ return -EINVAL;
+ /* can't preallocate a non-owned tag */
+ if (addr->smctp_tag & MCTP_TAG_PREALLOC &&
+ !(addr->smctp_tag & MCTP_TAG_OWNER))
return -EINVAL;
} else {
@@ -119,6 +129,34 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (addr->smctp_network == MCTP_NET_ANY)
addr->smctp_network = mctp_default_net(sock_net(sk));
+ /* direct addressing */
+ if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
+ DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
+ extaddr, msg->msg_name);
+ struct net_device *dev;
+
+ rc = -EINVAL;
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex);
+ /* check for correct halen */
+ if (dev && extaddr->smctp_halen == dev->addr_len) {
+ hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr);
+ rc = 0;
+ }
+ rcu_read_unlock();
+ if (rc)
+ goto err_free;
+ rt = NULL;
+ } else {
+ rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr);
+ if (!rt) {
+ rc = -EHOSTUNREACH;
+ goto err_free;
+ }
+ hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr);
+ }
+
skb = sock_alloc_send_skb(sk, hlen + 1 + len,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
@@ -137,8 +175,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
cb = __mctp_cb(skb);
cb->net = addr->smctp_network;
- /* direct addressing */
- if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
+ if (!rt) {
+ /* fill extended address in cb */
DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
extaddr, msg->msg_name);
@@ -149,17 +187,9 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
cb->ifindex = extaddr->smctp_ifindex;
+ /* smctp_halen is checked above */
cb->halen = extaddr->smctp_halen;
memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen);
-
- rt = NULL;
- } else {
- rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
- addr->smctp_addr.s_addr);
- if (!rt) {
- rc = -EHOSTUNREACH;
- goto err_free;
- }
}
rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
@@ -186,7 +216,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb)
return rc;
@@ -208,7 +238,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (rc < 0)
goto out_free;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (addr) {
struct mctp_skb_cb *cb = mctp_cb(skb);
@@ -248,6 +278,33 @@ out_free:
return rc;
}
+/* We're done with the key; invalidate, stop reassembly, and remove from lists.
+ */
+static void __mctp_key_remove(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags, unsigned long reason)
+__releases(&key->lock)
+__must_hold(&net->mctp.keys_lock)
+{
+ struct sk_buff *skb;
+
+ trace_mctp_key_release(key, reason);
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ spin_unlock_irqrestore(&key->lock, flags);
+
+ if (!hlist_unhashed(&key->hlist)) {
+ hlist_del_init(&key->hlist);
+ hlist_del_init(&key->sklist);
+ /* unref for the lists */
+ mctp_key_unref(key);
+ }
+
+ kfree_skb(skb);
+}
+
static int mctp_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -293,6 +350,123 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
}
+static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *key = NULL;
+ struct mctp_ioc_tag_ctl ctl;
+ unsigned long flags;
+ u8 tag;
+
+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (ctl.tag)
+ return -EINVAL;
+
+ if (ctl.flags)
+ return -EINVAL;
+
+ key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
+ true, &tag);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
+ if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
+ unsigned long fl2;
+ /* Unwind our key allocation: the keys list lock needs to be
+ * taken before the individual key locks, and we need a valid
+ * flags value (fl2) to pass to __mctp_key_remove, hence the
+ * second spin_lock_irqsave() rather than a plain spin_lock().
+ */
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ spin_lock_irqsave(&key->lock, fl2);
+ __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_DROPPED);
+ mctp_key_unref(key);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ return -EFAULT;
+ }
+
+ mctp_key_unref(key);
+ return 0;
+}
+
+static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_ioc_tag_ctl ctl;
+ unsigned long flags, fl2;
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ int rc;
+ u8 tag;
+
+ if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (ctl.flags)
+ return -EINVAL;
+
+ /* Must be a local tag, TO set, preallocated */
+ if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
+ return -EINVAL;
+
+ tag = ctl.tag & MCTP_TAG_MASK;
+ rc = -EINVAL;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ /* we do an irqsave here, even though we know the irq state,
+ * so we have the flags to pass to __mctp_key_remove
+ */
+ spin_lock_irqsave(&key->lock, fl2);
+ if (key->manual_alloc &&
+ ctl.peer_addr == key->peer_addr &&
+ tag == key->tag) {
+ __mctp_key_remove(key, net, fl2,
+ MCTP_TRACE_KEY_DROPPED);
+ rc = 0;
+ } else {
+ spin_unlock_irqrestore(&key->lock, fl2);
+ }
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+
+ switch (cmd) {
+ case SIOCMCTPALLOCTAG:
+ return mctp_ioctl_alloctag(msk, arg);
+ case SIOCMCTPDROPTAG:
+ return mctp_ioctl_droptag(msk, arg);
+ }
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_COMPAT
+static int mctp_compat_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp = compat_ptr(arg);
+
+ switch (cmd) {
+ /* These have compatible ptr layouts */
+ case SIOCMCTPALLOCTAG:
+ case SIOCMCTPDROPTAG:
+ return mctp_ioctl(sock, cmd, (unsigned long)argp);
+ }
+
+ return -ENOIOCTLCMD;
+}
+#endif
+
static const struct proto_ops mctp_dgram_ops = {
.family = PF_MCTP,
.release = mctp_release,
@@ -302,7 +476,7 @@ static const struct proto_ops mctp_dgram_ops = {
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = datagram_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = mctp_ioctl,
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -312,6 +486,9 @@ static const struct proto_ops mctp_dgram_ops = {
.recvmsg = mctp_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = mctp_compat_ioctl,
+#endif
};
static void mctp_sk_expire_keys(struct timer_list *timer)
@@ -319,7 +496,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
struct mctp_sock *msk = container_of(timer, struct mctp_sock,
key_expiry);
struct net *net = sock_net(&msk->sk);
- unsigned long next_expiry, flags;
+ unsigned long next_expiry, flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
bool next_expiry_valid = false;
@@ -327,15 +504,16 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- spin_lock(&key->lock);
+ /* don't expire. manual_alloc is immutable, no locking
+ * required.
+ */
+ if (key->manual_alloc)
+ continue;
+ spin_lock_irqsave(&key->lock, fl2);
if (!time_after_eq(key->expiry, jiffies)) {
- trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT);
- key->valid = false;
- hlist_del_rcu(&key->hlist);
- hlist_del_rcu(&key->sklist);
- spin_unlock(&key->lock);
- mctp_key_unref(key);
+ __mctp_key_remove(key, net, fl2,
+ MCTP_TRACE_KEY_TIMEOUT);
continue;
}
@@ -346,7 +524,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
next_expiry = key->expiry;
next_expiry_valid = true;
}
- spin_unlock(&key->lock);
+ spin_unlock_irqrestore(&key->lock, fl2);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
@@ -387,9 +565,9 @@ static void mctp_sk_unhash(struct sock *sk)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct net *net = sock_net(sk);
+ unsigned long flags, fl2;
struct mctp_sk_key *key;
struct hlist_node *tmp;
- unsigned long flags;
/* remove from any type-based binds */
mutex_lock(&net->mctp.bind_lock);
@@ -399,20 +577,8 @@ static void mctp_sk_unhash(struct sock *sk)
/* remove tag allocations */
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
- hlist_del(&key->sklist);
- hlist_del(&key->hlist);
-
- trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED);
-
- spin_lock(&key->lock);
- kfree_skb(key->reasm_head);
- key->reasm_head = NULL;
- key->reasm_dead = true;
- key->valid = false;
- spin_unlock(&key->lock);
-
- /* key is no longer on the lookup lists, unref */
- mctp_key_unref(key);
+ spin_lock_irqsave(&key->lock, fl2);
+ __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
}
@@ -499,12 +665,14 @@ static __init int mctp_init(void)
rc = mctp_neigh_init();
if (rc)
- goto err_unreg_proto;
+ goto err_unreg_routes;
mctp_device_init();
return 0;
+err_unreg_routes:
+ mctp_routes_exit();
err_unreg_proto:
proto_unregister(&mctp_proto);
err_unreg_sock:
diff --git a/net/mctp/device.c b/net/mctp/device.c
index ef2755f82f87..99a3bda8852f 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -6,6 +6,7 @@
* Copyright (c) 2021 Google
*/
+#include <linux/if_arp.h>
#include <linux/if_link.h>
#include <linux/mctp.h>
#include <linux/netdevice.h>
@@ -24,12 +25,25 @@ struct mctp_dump_cb {
size_t a_idx;
};
-/* unlocked: caller must hold rcu_read_lock */
+/* unlocked: caller must hold rcu_read_lock.
+ * Returned mctp_dev has its refcount incremented, or NULL if unset.
+ */
struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
{
- return rcu_dereference(dev->mctp_ptr);
+ struct mctp_dev *mdev = rcu_dereference(dev->mctp_ptr);
+
+ /* RCU guarantees that any mdev is still live.
+ * Zero refcount implies a pending free, return NULL.
+ */
+ if (mdev)
+ if (!refcount_inc_not_zero(&mdev->refs))
+ return NULL;
+ return mdev;
}
+/* Returned mctp_dev does not have refcount incremented. The returned pointer
+ * remains live while rtnl_lock is held, as that prevents mctp_unregister()
+ */
struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
{
return rtnl_dereference(dev->mctp_ptr);
@@ -106,7 +120,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct ifaddrmsg *hdr;
struct mctp_dev *mdev;
int ifindex;
- int idx, rc;
+ int idx = 0, rc;
hdr = nlmsg_data(cb->nlh);
// filter by ifindex if requested
@@ -123,6 +137,7 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (mdev) {
rc = mctp_dump_dev_addrinfo(mdev,
skb, cb);
+ mctp_dev_put(mdev);
// Error indicates full buffer, this
// callback will get retried.
if (rc < 0)
@@ -208,7 +223,7 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!mdev)
return -ENODEV;
- if (!mctp_address_ok(addr->s_addr))
+ if (!mctp_address_unicast(addr->s_addr))
return -EINVAL;
/* Prevent duplicates. Under RTNL so don't need to lock for reading */
@@ -297,7 +312,8 @@ void mctp_dev_hold(struct mctp_dev *mdev)
void mctp_dev_put(struct mctp_dev *mdev)
{
- if (refcount_dec_and_test(&mdev->refs)) {
+ if (mdev && refcount_dec_and_test(&mdev->refs)) {
+ kfree(mdev->addrs);
dev_put(mdev->dev);
kfree_rcu(mdev, rcu);
}
@@ -369,6 +385,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev,
if (!mdev)
return 0;
ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ mctp_dev_put(mdev);
return ret;
}
@@ -412,10 +429,10 @@ static void mctp_unregister(struct net_device *dev)
struct mctp_dev *mdev;
mdev = mctp_dev_get_rtnl(dev);
- if (mctp_known(dev) != (bool)mdev) {
+ if (mdev && !mctp_known(dev)) {
// Sanity check, should match what was set in mctp_register
- netdev_warn(dev, "%s: mdev pointer %d but type (%d) match is %d",
- __func__, (bool)mdev, mctp_known(dev), dev->type);
+ netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d",
+ __func__, dev->type);
return;
}
if (!mdev)
@@ -425,7 +442,6 @@ static void mctp_unregister(struct net_device *dev)
mctp_route_remove_dev(mdev);
mctp_neigh_remove_dev(mdev);
- kfree(mdev->addrs);
mctp_dev_put(mdev);
}
@@ -439,7 +455,7 @@ static int mctp_register(struct net_device *dev)
if (mdev) {
if (!mctp_known(dev))
- netdev_warn(dev, "%s: mctp_dev set for unknown type %d",
+ netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d",
__func__, dev->type);
return 0;
}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
index 6ad3e33bd4d4..ffa0f9e0983f 100644
--- a/net/mctp/neigh.c
+++ b/net/mctp/neigh.c
@@ -143,7 +143,7 @@ static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
}
eid = nla_get_u8(tb[NDA_DST]);
- if (!mctp_address_ok(eid)) {
+ if (!mctp_address_unicast(eid)) {
NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
return -EINVAL;
}
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 8d9f4ff3e285..f9a80b82dc51 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -64,8 +64,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
if (msk->bind_type != type)
continue;
- if (msk->bind_addr != MCTP_ADDR_ANY &&
- msk->bind_addr != mh->dest)
+ if (!mctp_address_matches(msk->bind_addr, mh->dest))
continue;
return msk;
@@ -77,7 +76,7 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
mctp_eid_t peer, u8 tag)
{
- if (key->local_addr != local)
+ if (!mctp_address_matches(key->local_addr, local))
return false;
if (key->peer_addr != peer)
@@ -204,29 +203,38 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
return rc;
}
-/* We're done with the key; unset valid and remove from lists. There may still
- * be outstanding refs on the key though...
+/* Helper for mctp_route_input().
+ * We're done with the key; unlock and unref the key.
+ * For the usual case of automatic expiry we remove the key from lists.
+ * In the case that manual allocation is set on a key we release the lock
+ * and local ref, reset reassembly, but don't remove from lists.
*/
-static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
- unsigned long flags)
- __releases(&key->lock)
+static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags, unsigned long reason)
+__releases(&key->lock)
{
struct sk_buff *skb;
+ trace_mctp_key_release(key, reason);
skb = key->reasm_head;
key->reasm_head = NULL;
- key->reasm_dead = true;
- key->valid = false;
- mctp_dev_release_key(key->dev, key);
- spin_unlock_irqrestore(&key->lock, flags);
- spin_lock_irqsave(&net->mctp.keys_lock, flags);
- hlist_del(&key->hlist);
- hlist_del(&key->sklist);
- spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ if (!key->manual_alloc) {
+ key->reasm_dead = true;
+ key->valid = false;
+ mctp_dev_release_key(key->dev, key);
+ }
+ spin_unlock_irqrestore(&key->lock, flags);
- /* one unref for the lists */
- mctp_key_unref(key);
+ if (!key->manual_alloc) {
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ if (!hlist_unhashed(&key->hlist)) {
+ hlist_del_init(&key->hlist);
+ hlist_del_init(&key->sklist);
+ mctp_key_unref(key);
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ }
/* and one for the local reference */
mctp_key_unref(key);
@@ -380,9 +388,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
/* we've hit a pending reassembly; not much we
* can do but drop it
*/
- trace_mctp_key_release(key,
- MCTP_TRACE_KEY_REPLIED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f,
+ MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
rc = 0;
@@ -412,21 +419,21 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc)
+ if (rc) {
kfree(key);
+ } else {
+ trace_mctp_key_acquire(key);
- trace_mctp_key_acquire(key);
-
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
+ /* we don't need to release key->lock on exit */
+ mctp_key_unref(key);
+ }
key = NULL;
} else {
if (key->reasm_head || key->reasm_dead) {
/* duplicate start? drop everything */
- trace_mctp_key_release(key,
- MCTP_TRACE_KEY_INVALIDATED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f,
+ MCTP_TRACE_KEY_INVALIDATED);
rc = -EEXIST;
key = NULL;
} else {
@@ -451,8 +458,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
if (!rc && flags & MCTP_HDR_FLAG_EOM) {
sock_queue_rcv_skb(key->sk, key->reasm_head);
key->reasm_head = NULL;
- trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
- __mctp_key_unlock_drop(key, net, f);
+ __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
@@ -497,6 +503,11 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
if (cb->ifindex) {
/* direct route; use the hwaddr we stashed in sendmsg */
+ if (cb->halen != skb->dev->addr_len) {
+ /* sanity check, sendmsg should have already caught this */
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
daddr = cb->haddr;
} else {
/* If lookup fails let the device handle daddr==NULL */
@@ -506,7 +517,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
daddr, skb->dev->dev_addr, skb->len);
- if (rc) {
+ if (rc < 0) {
kfree_skb(skb);
return -EHOSTUNREACH;
}
@@ -580,9 +591,9 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
* it for the socket msk
*/
-static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t saddr,
- mctp_eid_t daddr, u8 *tagp)
+struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr, mctp_eid_t saddr,
+ bool manual, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
struct netns_mctp *mns = &net->mctp;
@@ -616,9 +627,8 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
- if (!((tmp->peer_addr == daddr ||
- tmp->peer_addr == MCTP_ADDR_ANY) &&
- tmp->local_addr == saddr))
+ if (!(mctp_address_matches(tmp->peer_addr, daddr) &&
+ mctp_address_matches(tmp->local_addr, saddr)))
continue;
spin_lock(&tmp->lock);
@@ -638,6 +648,7 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
mctp_reserve_tag(net, key, msk);
trace_mctp_key_acquire(key);
+ key->manual_alloc = manual;
*tagp = key->tag;
}
@@ -651,6 +662,50 @@ static struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
return key;
}
+static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
+ mctp_eid_t daddr,
+ u8 req_tag, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+
+ req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER);
+ key = NULL;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ if (tmp->tag != req_tag)
+ continue;
+
+ if (!mctp_address_matches(tmp->peer_addr, daddr))
+ continue;
+
+ if (!tmp->manual_alloc)
+ continue;
+
+ spin_lock(&tmp->lock);
+ if (tmp->valid) {
+ key = tmp;
+ refcount_inc(&key->refs);
+ spin_unlock(&tmp->lock);
+ break;
+ }
+ spin_unlock(&tmp->lock);
+ }
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!key)
+ return ERR_PTR(-ENOENT);
+
+ if (tagp)
+ *tagp = key->tag;
+
+ return key;
+}
+
/* routing lookups */
static bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
@@ -706,7 +761,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
{
const unsigned int hlen = sizeof(struct mctp_hdr);
struct mctp_hdr *hdr, *hdr2;
- unsigned int pos, size;
+ unsigned int pos, size, headroom;
struct sk_buff *skb2;
int rc;
u8 seq;
@@ -720,6 +775,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
return -EMSGSIZE;
}
+ /* keep same headroom as the original skb */
+ headroom = skb_headroom(skb);
+
/* we've got the header */
skb_pull(skb, hlen);
@@ -727,7 +785,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
/* size of message payload */
size = min(mtu - hlen, skb->len - pos);
- skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+ skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL);
if (!skb2) {
rc = -ENOMEM;
break;
@@ -743,7 +801,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
skb_set_owner_w(skb2, skb->sk);
/* establish packet */
- skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+ skb_reserve(skb2, headroom);
skb_reset_network_header(skb2);
skb_put(skb2, hlen + size);
skb2->transport_header = skb2->network_header + hlen;
@@ -785,9 +843,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb = mctp_cb(skb);
- struct mctp_route tmp_rt;
+ struct mctp_route tmp_rt = {0};
struct mctp_sk_key *key;
- struct net_device *dev;
struct mctp_hdr *hdr;
unsigned long flags;
unsigned int mtu;
@@ -800,12 +857,12 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
if (rt) {
ext_rt = false;
- dev = NULL;
-
if (WARN_ON(!rt->dev))
goto out_release;
} else if (cb->ifindex) {
+ struct net_device *dev;
+
ext_rt = true;
rt = &tmp_rt;
@@ -815,7 +872,6 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rcu_read_unlock();
return rc;
}
-
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -845,8 +901,14 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
if (rc)
goto out_release;
- if (req_tag & MCTP_HDR_FLAG_TO) {
- key = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (req_tag & MCTP_TAG_OWNER) {
+ if (req_tag & MCTP_TAG_PREALLOC)
+ key = mctp_lookup_prealloc_tag(msk, daddr,
+ req_tag, &tag);
+ else
+ key = mctp_alloc_local_tag(msk, daddr, saddr,
+ false, &tag);
+
if (IS_ERR(key)) {
rc = PTR_ERR(key);
goto out_release;
@@ -857,7 +919,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
tag |= MCTP_HDR_FLAG_TO;
} else {
key = NULL;
- tag = req_tag;
+ tag = req_tag & MCTP_TAG_MASK;
}
skb->protocol = htons(ETH_P_MCTP);
@@ -890,10 +952,9 @@ out_release:
if (!ext_rt)
mctp_route_release(rt);
- dev_put(dev);
+ mctp_dev_put(tmp_rt.dev);
return rc;
-
}
/* route management */
@@ -905,7 +966,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *ert;
- if (!mctp_address_ok(daddr_start))
+ if (!mctp_address_unicast(daddr_start))
return -EINVAL;
if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
@@ -1035,6 +1096,17 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
goto err_drop;
+ /* source must be valid unicast or null; drop reserved ranges and
+ * broadcast
+ */
+ if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src)))
+ goto err_drop;
+
+ /* dest address: as above, but allow broadcast */
+ if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
+ mctp_address_broadcast(mh->dest)))
+ goto err_drop;
+
/* MCTP drivers must populate halen/haddr */
if (dev->type == ARPHRD_MCTP) {
cb = mctp_cb(skb);
@@ -1056,11 +1128,13 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
rt->output(rt, skb);
mctp_route_release(rt);
+ mctp_dev_put(mdev);
return NET_RX_SUCCESS;
err_drop:
kfree_skb(skb);
+ mctp_dev_put(mdev);
return NET_RX_DROP;
}
@@ -1326,7 +1400,7 @@ int __init mctp_routes_init(void)
return register_pernet_subsys(&mctp_net_ops);
}
-void __exit mctp_routes_exit(void)
+void mctp_routes_exit(void)
{
unregister_pernet_subsys(&mctp_net_ops);
rtnl_unregister(PF_MCTP, RTM_DELROUTE);
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 86ad15abf897..92ea4158f7fc 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -285,7 +285,7 @@ static void __mctp_route_test_init(struct kunit *test,
struct mctp_test_route **rtp,
struct socket **sockp)
{
- struct sockaddr_mctp addr;
+ struct sockaddr_mctp addr = {0};
struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct socket *sock;
@@ -352,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
if (params->deliver) {
KUNIT_EXPECT_EQ(test, rc, 0);
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
KUNIT_EXPECT_EQ(test, skb->len, 1);
@@ -360,8 +360,8 @@ static void mctp_test_route_input_sk(struct kunit *test)
} else {
KUNIT_EXPECT_NE(test, rc, 0);
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
- KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_NULL(test, skb2);
}
__mctp_route_test_fini(test, dev, rt, sock);
@@ -369,14 +369,15 @@ static void mctp_test_route_input_sk(struct kunit *test)
#define FL_S (MCTP_HDR_FLAG_SOM)
#define FL_E (MCTP_HDR_FLAG_EOM)
-#define FL_T (MCTP_HDR_FLAG_TO)
+#define FL_TO (MCTP_HDR_FLAG_TO)
+#define FL_T(t) ((t) & MCTP_HDR_TAG_MASK)
static const struct mctp_route_input_sk_test mctp_route_input_sk_tests[] = {
- { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 0, .deliver = true },
- { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T), .type = 1, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 0, .deliver = true },
+ { .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO), .type = 1, .deliver = false },
{ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E), .type = 0, .deliver = false },
- { .hdr = RX_HDR(1, 10, 8, FL_E | FL_T), .type = 0, .deliver = false },
- { .hdr = RX_HDR(1, 10, 8, FL_T), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_E | FL_TO), .type = 0, .deliver = false },
+ { .hdr = RX_HDR(1, 10, 8, FL_TO), .type = 0, .deliver = false },
{ .hdr = RX_HDR(1, 10, 8, 0), .type = 0, .deliver = false },
};
@@ -422,7 +423,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
rc = mctp_route_input(&rt->rt, skb);
}
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
if (params->rx_len) {
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
@@ -430,13 +431,13 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
skb_free_datagram(sock->sk, skb2);
} else {
- KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+ KUNIT_EXPECT_NULL(test, skb2);
}
__mctp_route_test_fini(test, dev, rt, sock);
}
-#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_T | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
+#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
static const struct mctp_route_input_sk_reasm_test mctp_route_input_sk_reasm_tests[] = {
{
@@ -522,12 +523,156 @@ static void mctp_route_input_sk_reasm_to_desc(
KUNIT_ARRAY_PARAM(mctp_route_input_sk_reasm, mctp_route_input_sk_reasm_tests,
mctp_route_input_sk_reasm_to_desc);
+struct mctp_route_input_sk_keys_test {
+ const char *name;
+ mctp_eid_t key_peer_addr;
+ mctp_eid_t key_local_addr;
+ u8 key_tag;
+ struct mctp_hdr hdr;
+ bool deliver;
+};
+
+/* test packet rx in the presence of various key configurations */
+static void mctp_test_route_input_sk_keys(struct kunit *test)
+{
+ const struct mctp_route_input_sk_keys_test *params;
+ struct mctp_test_route *rt;
+ struct sk_buff *skb, *skb2;
+ struct mctp_test_dev *dev;
+ struct mctp_sk_key *key;
+ struct netns_mctp *mns;
+ struct mctp_sock *msk;
+ struct socket *sock;
+ unsigned long flags;
+ int rc;
+ u8 c;
+
+ params = test->param_value;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ msk = container_of(sock->sk, struct mctp_sock, sk);
+ mns = &sock_net(sock->sk)->mctp;
+
+ /* set the incoming tag according to test params */
+ key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr,
+ params->key_tag, GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key);
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+ mctp_reserve_tag(&init_net, key, msk);
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ /* create packet and route */
+ c = 0;
+ skb = mctp_test_create_skb_data(&params->hdr, &c);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ skb->dev = dev->ndev;
+ __mctp_cb(skb);
+
+ rc = mctp_route_input(&rt->rt, skb);
+
+ /* (potentially) receive message */
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+
+ if (params->deliver)
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
+ else
+ KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
+
+ if (skb2)
+ skb_free_datagram(sock->sk, skb2);
+
+ mctp_key_unref(key);
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
+static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = {
+ {
+ .name = "direct match",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+ {
+ .name = "flipped src/dest",
+ .key_peer_addr = 8,
+ .key_local_addr = 9,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = false,
+ },
+ {
+ .name = "peer addr mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = false,
+ },
+ {
+ .name = "tag value mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(2)),
+ .deliver = false,
+ },
+ {
+ .name = "TO mismatch",
+ .key_peer_addr = 9,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO),
+ .deliver = false,
+ },
+ {
+ .name = "broadcast response",
+ .key_peer_addr = MCTP_ADDR_ANY,
+ .key_local_addr = 8,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 11, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+ {
+ .name = "any local match",
+ .key_peer_addr = 12,
+ .key_local_addr = MCTP_ADDR_ANY,
+ .key_tag = 1,
+ .hdr = RX_HDR(1, 12, 8, FL_S | FL_E | FL_T(1)),
+ .deliver = true,
+ },
+};
+
+static void mctp_route_input_sk_keys_to_desc(
+ const struct mctp_route_input_sk_keys_test *t,
+ char *desc)
+{
+ sprintf(desc, "%s", t->name);
+}
+
+KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
+ mctp_route_input_sk_keys_to_desc);
+
static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk, mctp_route_input_sk_gen_params),
KUNIT_CASE_PARAM(mctp_test_route_input_sk_reasm,
mctp_route_input_sk_reasm_gen_params),
+ KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys,
+ mctp_route_input_sk_keys_gen_params),
{}
};
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index 7b7918702592..e03ba66bbe18 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -54,7 +54,6 @@ struct mctp_test_dev *mctp_test_create_dev(void)
rcu_read_lock();
dev->mdev = __mctp_dev_get(ndev);
- mctp_dev_hold(dev->mdev);
rcu_read_unlock();
return dev;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 48f75a56f4ae..b52afe316dc4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev,
p = per_cpu_ptr(mdev->stats, i);
do {
- start = u64_stats_fetch_begin(&p->syncp);
+ start = u64_stats_fetch_begin_irq(&p->syncp);
local = p->stats;
- } while (u64_stats_fetch_retry(&p->syncp, start));
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
stats->rx_packets += local.rx_packets;
stats->rx_bytes += local.rx_bytes;
@@ -1527,10 +1527,9 @@ static int mpls_ifdown(struct net_device *dev, int event)
rt->rt_nh_size;
struct mpls_route *orig = rt;
- rt = kmalloc(size, GFP_KERNEL);
+ rt = kmemdup(orig, size, GFP_KERNEL);
if (!rt)
return -ENOMEM;
- memcpy(rt, orig, size);
}
}
@@ -1607,6 +1606,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mpls_dev *mdev;
unsigned int flags;
+ int err;
if (event == NETDEV_REGISTER) {
mdev = mpls_add_dev(dev);
@@ -1621,7 +1621,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
return NOTIFY_OK;
switch (event) {
- int err;
case NETDEV_DOWN:
err = mpls_ifdown(dev, event);
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index e54daceac58b..6e7df47c9584 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
@@ -10,3 +10,5 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
mptcp_crypto_test-objs := crypto_test.o
mptcp_token_test-objs := token_test.o
obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o
+
+obj-$(CONFIG_BPF_SYSCALL) += bpf.o
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
new file mode 100644
index 000000000000..5a0a84ad94af
--- /dev/null
+++ b/net/mptcp/bpf.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2020, Tessares SA.
+ * Copyright (c) 2022, SUSE.
+ *
+ * Author: Nicolas Rybowski <nicolas.rybowski@tessares.net>
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/bpf.h>
+#include "protocol.h"
+
+struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
+{
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
+ return mptcp_sk(mptcp_subflow_ctx(sk)->conn);
+
+ return NULL;
+}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 8b235468c88f..ae20b7d92e28 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -16,6 +16,11 @@
#define MPTCP_SYSCTL_PATH "net/mptcp"
static int mptcp_pernet_id;
+
+#ifdef CONFIG_SYSCTL
+static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
+#endif
+
struct mptcp_pernet {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_hdr;
@@ -26,6 +31,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
+ u8 pm_type;
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
return mptcp_get_pernet(net)->stale_loss_cnt;
}
+int mptcp_get_pm_type(const struct net *net)
+{
+ return mptcp_get_pernet(net)->pm_type;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
+ pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
}
#ifdef CONFIG_SYSCTL
@@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
},
+ {
+ .procname = "pm_type",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &mptcp_pm_type_max
+ },
{}
};
@@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
+ table[5].data = &pernet->pm_type;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3240b72271a7..0dac2863c6e1 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
+ SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
@@ -35,20 +36,30 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
+ SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
+ SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
+ SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX),
+ SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX),
+ SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX),
+ SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX),
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
+ SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
+ SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index ecd3d8b117e0..2be3596374f4 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
+ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
@@ -28,20 +29,32 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
+ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
+ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
+ MPTCP_MIB_MPFASTCLOSETX, /* Transmit a MP_FASTCLOSE */
+ MPTCP_MIB_MPFASTCLOSERX, /* Received a MP_FASTCLOSE */
+ MPTCP_MIB_MPRSTTX, /* Transmit a MP_RST */
+ MPTCP_MIB_MPRSTRX, /* Received a MP_RST */
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
+ MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */
+ MPTCP_MIB_RCVWNDSHARED, /* Subflow rcv wnd is overridden by msk's one */
+ MPTCP_MIB_RCVWNDCONFLICTUPDATE, /* subflow rcv wnd is overridden by msk's one due to
+ * conflict with another subflow while updating msk rcv wnd
+ */
+ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f44125dd6697..8df1bdb647e2 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -66,20 +66,106 @@ out_nosk:
return err;
}
+struct mptcp_diag_ctx {
+ long s_slot;
+ long s_num;
+ unsigned int l_slot;
+ unsigned int l_num;
+};
+
+static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ bool net_admin)
+{
+ struct inet_diag_dump_data *cb_data = cb->data;
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
+ struct nlattr *bc = cb_data->inet_diag_nla_bc;
+ struct net *net = sock_net(skb->sk);
+ struct inet_hashinfo *hinfo;
+ int i;
+
+ hinfo = net->ipv4.tcp_death_row.hashinfo;
+
+ for (i = diag_ctx->l_slot; i <= hinfo->lhash2_mask; i++) {
+ struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
+ struct sock *sk;
+ int num = 0;
+
+ ilb = &hinfo->lhash2[i];
+
+ rcu_read_lock();
+ spin_lock(&ilb->lock);
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ int ret;
+
+ if (num < diag_ctx->l_num)
+ goto next_listen;
+
+ if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
+ goto next_listen;
+
+ sk = ctx->conn;
+ if (!sk || !net_eq(sock_net(sk), net))
+ goto next_listen;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next_listen;
+
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto next_listen;
+
+ ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+
+ sock_put(sk);
+
+ if (ret < 0) {
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+ diag_ctx->l_slot = i;
+ diag_ctx->l_num = num;
+ return;
+ }
+ diag_ctx->l_num = num + 1;
+ num = 0;
+next_listen:
+ ++num;
+ }
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+
+ cond_resched();
+ diag_ctx->l_num = 0;
+ }
+
+ diag_ctx->l_num = 0;
+ diag_ctx->l_slot = i;
+}
+
static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
struct mptcp_sock *msk;
struct nlattr *bc;
+ BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
+
cb_data = cb->data;
bc = cb_data->inet_diag_nla_bc;
- while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
- NULL) {
+ while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
+ &diag_ctx->s_num)) != NULL) {
struct inet_sock *inet = (struct inet_sock *)msk;
struct sock *sk = (struct sock *)msk;
int ret = 0;
@@ -101,11 +187,14 @@ next:
sock_put(sk);
if (ret < 0) {
/* will retry on the same position */
- cb->args[1]--;
+ diag_ctx->s_num--;
break;
}
cond_resched();
}
+
+ if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
+ mptcp_diag_dump_listeners(skb, cb, r, net_admin);
}
static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -116,6 +205,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
+
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ struct sock *lsk = READ_ONCE(msk->first);
+
+ if (lsk) {
+ /* override with settings from tcp listener,
+ * so Send-Q will show accept queue.
+ */
+ r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
+ }
+ }
+
if (!info)
return;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 645dd984fef0..30d289044e71 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
}
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
ptr += 2;
}
@@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
ptr += 2;
}
@@ -323,6 +323,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
+ pr_debug("MP_FASTCLOSE: recv_key=%llu", mp_opt->rcvr_key);
break;
case MPTCPOPT_RST:
@@ -336,6 +337,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
+ pr_debug("MP_RST: transient=%u reason=%u",
+ mp_opt->reset_transient, mp_opt->reset_reason);
break;
case MPTCPOPT_MP_FAIL:
@@ -353,8 +356,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
}
-void mptcp_get_options(const struct sock *sk,
- const struct sk_buff *skb,
+void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
const struct tcphdr *th = tcp_hdr(skb);
@@ -651,7 +653,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
bool drop_other_suboptions = false;
unsigned int opt_size = *size;
bool echo;
- bool port;
int len;
/* add addr will strip the existing options, be sure to avoid breaking
@@ -660,12 +661,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
!mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
- &echo, &port, &drop_other_suboptions))
+ &echo, &drop_other_suboptions))
return false;
if (drop_other_suboptions)
remaining += opt_size;
- len = mptcp_add_addr_len(opts->addr.family, echo, port);
+ len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port);
if (remaining < len)
return false;
@@ -764,6 +765,7 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu
opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX);
return true;
}
@@ -787,6 +789,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
opts->rcvr_key = msk->remote_key;
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
return true;
}
@@ -808,6 +811,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk,
opts->fail_seq = subflow->map_seq;
pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
return true;
}
@@ -824,7 +828,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
- if (unlikely(__mptcp_check_fallback(msk)))
+ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
@@ -928,7 +932,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
- READ_ONCE(msk->pm.server_side))
+ !subflow->request_join)
tcp_send_ack(ssk);
goto fully_established;
}
@@ -963,7 +967,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
goto reset;
subflow->mp_capable = 0;
pr_fallback(msk);
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
return false;
}
@@ -1084,8 +1088,7 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
&mp_opt->addr);
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
- msk, (unsigned long long)hmac,
- (unsigned long long)mp_opt->ahmac);
+ msk, hmac, mp_opt->ahmac);
return hmac == mp_opt->ahmac;
}
@@ -1112,7 +1115,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
/* The subflow can be in close state only if check_fully_established()
* just sent a reset. If so, tell the caller to ignore the current packet.
@@ -1125,12 +1128,13 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
msk->local_key == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);
}
if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
add_addr_hmac_valid(msk, &mp_opt)) {
if (!mp_opt.echo) {
- mptcp_pm_add_addr_received(msk, &mp_opt.addr);
+ mptcp_pm_add_addr_received(sk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
} else {
mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
@@ -1159,6 +1163,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
subflow->reset_seen = 1;
subflow->reset_reason = mp_opt.reset_reason;
subflow->reset_transient = mp_opt.reset_transient;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTRX);
}
if (!(mp_opt.suboptions & OPTION_MPTCP_DSS))
@@ -1220,23 +1225,65 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
-static void mptcp_set_rwin(const struct tcp_sock *tp)
+static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
{
const struct sock *ssk = (const struct sock *)tp;
- const struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow;
+ u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
struct mptcp_sock *msk;
- u64 ack_seq;
+ u32 new_win;
+ u64 win;
subflow = mptcp_subflow_ctx(ssk);
msk = mptcp_sk(subflow->conn);
- ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
+ ack_seq = READ_ONCE(msk->ack_seq);
+ rcv_wnd_new = ack_seq + tp->rcv_wnd;
+
+ rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
+ if (after64(rcv_wnd_new, rcv_wnd_old)) {
+ u64 rcv_wnd;
+
+ for (;;) {
+ rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new);
+
+ if (rcv_wnd == rcv_wnd_old)
+ break;
+ if (before64(rcv_wnd_new, rcv_wnd)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
+ goto raise_win;
+ }
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
+ rcv_wnd_old = rcv_wnd;
+ }
+ return;
+ }
+
+ if (rcv_wnd_new != rcv_wnd_old) {
+raise_win:
+ win = rcv_wnd_old - ack_seq;
+ tp->rcv_wnd = min_t(u64, win, U32_MAX);
+ new_win = tp->rcv_wnd;
- if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ /* Make sure we do not exceed the maximum possible
+ * scaled window.
+ */
+ if (unlikely(th->syn))
+ new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
+ if (!tp->rx_opt.rcv_wscale &&
+ READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
+ new_win = min(new_win, MAX_TCP_WINDOW);
+ else
+ new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
+
+ /* RFC1323 scaling applied */
+ new_win >>= tp->rx_opt.rcv_wscale;
+ th->window = htons(new_win);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
+ }
}
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1252,34 +1299,52 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
header.csum = 0;
csum = csum_partial(&header, sizeof(header), sum);
- return (__force u16)csum_fold(csum);
+ return csum_fold(csum);
}
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext)
{
return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
~csum_unfold(mpext->csum));
}
-void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
- struct mptcp_out_options *opts)
+static void put_len_csum(u16 len, __sum16 csum, void *data)
{
- if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
+ __sum16 *sumptr = data + 2;
+ __be16 *ptr = data;
- subflow = mptcp_subflow_ctx(ssk);
- subflow->send_mp_fail = 0;
+ put_unaligned_be16(len, ptr);
- *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
- TCPOLEN_MPTCP_FAIL,
- 0, 0);
- put_unaligned_be64(opts->fail_seq, ptr);
- ptr += 2;
- }
+ put_unaligned(csum, sumptr);
+}
- /* DSS, MPC, MPJ, ADD_ADDR, FASTCLOSE and RST are mutually exclusive,
- * see mptcp_established_options*()
+void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
+ struct mptcp_out_options *opts)
+{
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ /* Which options can be used together?
+ *
+ * X: mutually exclusive
+ * O: often used together
+ * C: can be used together in some cases
+ * P: could be used together but we prefer not to (optimisations)
+ *
+ * Opt: | MPC | MPJ | DSS | ADD | RM | PRIO | FAIL | FC |
+ * ------|------|------|------|------|------|------|------|------|
+ * MPC |------|------|------|------|------|------|------|------|
+ * MPJ | X |------|------|------|------|------|------|------|
+ * DSS | X | X |------|------|------|------|------|------|
+ * ADD | X | X | P |------|------|------|------|------|
+ * RM | C | C | C | P |------|------|------|------|
+ * PRIO | X | C | C | C | C |------|------|------|
+ * FAIL | X | X | C | X | X | X |------|------|
+ * FC | X | X | X | X | X | X | X |------|
+ * RST | X | X | X | X | X | X | O | O |
+ * ------|------|------|------|------|------|------|------|------|
+ *
+ * The same applies in mptcp_established_options() function.
*/
if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
struct mptcp_ext *mpext = &opts->ext_copy;
@@ -1328,14 +1393,22 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
- put_unaligned_be32(mpext->data_len << 16 |
- mptcp_make_csum(mpext), ptr);
+ /* data_len == 0 is reserved for the infinite mapping,
+ * the checksum will also be set to 0.
+ */
+ put_len_csum(mpext->data_len,
+ (mpext->data_len ? mptcp_make_csum(mpext) : 0),
+ ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1;
}
+
+ /* We might need to add MP_FAIL options in rare cases */
+ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions))
+ goto mp_fail;
} else if (OPTIONS_MPTCP_MPC & opts->suboptions) {
u8 len, flag = MPTCP_CAP_HMAC_SHA256;
@@ -1376,11 +1449,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
goto mp_capable_done;
if (opts->csum_reqd) {
- put_unaligned_be32(opts->data_len << 16 |
- __mptcp_make_csum(opts->data_seq,
- opts->subflow_seq,
- opts->data_len,
- ~csum_unfold(opts->csum)), ptr);
+ put_len_csum(opts->data_len,
+ __mptcp_make_csum(opts->data_seq,
+ opts->subflow_seq,
+ opts->data_len,
+ ~csum_unfold(opts->csum)),
+ ptr);
} else {
put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1479,6 +1553,21 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (OPTION_MPTCP_RST & opts->suboptions)
goto mp_rst;
return;
+ } else if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+mp_fail:
+ /* MP_FAIL is mutually exclusive with others except RST */
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_fail = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+ TCPOLEN_MPTCP_FAIL,
+ 0, 0);
+ put_unaligned_be64(opts->fail_seq, ptr);
+ ptr += 2;
+
+ if (OPTION_MPTCP_RST & opts->suboptions)
+ goto mp_rst;
+ return;
} else if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
mp_rst:
*ptr++ = mptcp_option(MPTCPOPT_RST,
@@ -1489,15 +1578,15 @@ mp_rst:
}
if (OPTION_MPTCP_PRIO & opts->suboptions) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
-
subflow = mptcp_subflow_ctx(ssk);
subflow->send_mp_prio = 0;
*ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
+
+ MPTCP_INC_STATS(sock_net((const struct sock *)tp),
+ MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:
@@ -1522,7 +1611,7 @@ mp_capable_done:
}
if (tp)
- mptcp_set_rwin(tp);
+ mptcp_set_rwin(tp, th);
}
__be32 mptcp_get_reset_option(const struct sk_buff *skb)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 696b2c4613a7..45e2a48397b9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
unsigned int subflows_max;
int ret = 0;
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_active(msk);
+
subflows_max = mptcp_pm_get_subflows_max(msk);
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
@@ -178,14 +181,14 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = (ssk->sk_state == TCP_CLOSE) &&
- (subflow->request_join || subflow->mp_join);
+ update_subflows = (subflow->request_join || subflow->mp_join) &&
+ mptcp_pm_is_kernel(msk);
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
spin_lock_bh(&pm->lock);
if (update_subflows)
- pm->subflows--;
+ __mptcp_pm_close_subflow(msk);
/* Even if this subflow is not really established, tell the PM to try
* to pick the next ones, if possible.
@@ -196,30 +199,41 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
- mptcp_event_addr_announced(msk, addr);
+ mptcp_event_addr_announced(ssk, addr);
spin_lock_bh(&pm->lock);
- if (!READ_ONCE(pm->accept_addr)) {
+ if (mptcp_pm_is_userspace(msk)) {
+ if (mptcp_userspace_pm_active(msk)) {
+ mptcp_pm_announce_addr(msk, addr, true);
+ mptcp_pm_add_addr_send_ack(msk);
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
+ }
+ } else if (!READ_ONCE(pm->accept_addr)) {
mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->remote = *addr;
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_data *pm = &msk->pm;
@@ -253,36 +267,67 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
mptcp_event_addr_removed(msk, rm_list->ids[i]);
spin_lock_bh(&pm->lock);
- mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
- pm->rm_list_rx = *rm_list;
+ if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
+ pm->rm_list_rx = *rm_list;
+ else
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
+void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
- subflow->backup = bkup;
+ msk = mptcp_sk(sk);
+ if (subflow->backup != bkup) {
+ subflow->backup = bkup;
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ msk->last_snd = NULL;
+ else
+ __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
+ mptcp_data_unlock(sk);
+ }
- mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
+ mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
pr_debug("fail_seq=%llu", fail_seq);
+
+ if (!READ_ONCE(msk->allow_infinite_fallback))
+ return;
+
+ if (!subflow->fail_tout) {
+ pr_debug("send MP_FAIL response and infinite map");
+
+ subflow->send_mp_fail = 1;
+ subflow->send_infinite_map = 1;
+ tcp_send_ack(sk);
+ } else {
+ pr_debug("MP_FAIL response received");
+ WRITE_ONCE(subflow->fail_tout, 0);
+ }
}
/* path manager helpers */
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
unsigned int opt_size, unsigned int remaining,
struct mptcp_addr_info *addr, bool *echo,
- bool *port, bool *drop_other_suboptions)
+ bool *drop_other_suboptions)
{
int ret = false;
u8 add_addr;
u8 family;
+ bool port;
spin_lock_bh(&msk->pm.lock);
@@ -300,10 +345,10 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
}
*echo = mptcp_pm_should_add_signal_echo(msk);
- *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
+ port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
family = *echo ? msk->pm.remote.family : msk->pm.local.family;
- if (remaining < mptcp_add_addr_len(family, *echo, *port))
+ if (remaining < mptcp_add_addr_len(family, *echo, port))
goto out_unlock;
if (*echo) {
@@ -377,27 +422,48 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
- msk->pm.add_addr_signaled = 0;
- msk->pm.add_addr_accepted = 0;
- msk->pm.local_addr_used = 0;
- msk->pm.subflows = 0;
- msk->pm.rm_list_tx.nr = 0;
- msk->pm.rm_list_rx.nr = 0;
- WRITE_ONCE(msk->pm.work_pending, false);
- WRITE_ONCE(msk->pm.addr_signal, 0);
- WRITE_ONCE(msk->pm.accept_addr, false);
- WRITE_ONCE(msk->pm.accept_subflow, false);
- WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
- msk->pm.status = 0;
- bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
+ struct mptcp_pm_data *pm = &msk->pm;
- mptcp_pm_nl_data_init(msk);
+ pm->add_addr_signaled = 0;
+ pm->add_addr_accepted = 0;
+ pm->local_addr_used = 0;
+ pm->subflows = 0;
+ pm->rm_list_tx.nr = 0;
+ pm->rm_list_rx.nr = 0;
+ WRITE_ONCE(pm->pm_type, pm_type);
+
+ if (pm_type == MPTCP_PM_TYPE_KERNEL) {
+ bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
+
+ /* pm->work_pending must be only be set to 'true' when
+ * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
+ */
+ WRITE_ONCE(pm->work_pending,
+ (!!mptcp_pm_get_local_addr_max(msk) &&
+ subflows_allowed) ||
+ !!mptcp_pm_get_add_addr_signal_max(msk));
+ WRITE_ONCE(pm->accept_addr,
+ !!mptcp_pm_get_add_addr_accept_max(msk) &&
+ subflows_allowed);
+ WRITE_ONCE(pm->accept_subflow, subflows_allowed);
+ } else {
+ WRITE_ONCE(pm->work_pending, 0);
+ WRITE_ONCE(pm->accept_addr, 0);
+ WRITE_ONCE(pm->accept_subflow, 0);
+ }
+
+ WRITE_ONCE(pm->addr_signal, 0);
+ WRITE_ONCE(pm->remote_deny_join_id0, false);
+ pm->status = 0;
+ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
+ INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list);
mptcp_pm_data_reset(msk);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 75af1f701e1d..9813ed0fde9b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -22,14 +22,6 @@ static struct genl_family mptcp_genl_family;
static int pm_nl_pernet_id;
-struct mptcp_pm_addr_entry {
- struct list_head list;
- struct mptcp_addr_info addr;
- u8 flags;
- int ifindex;
- struct socket *lsk;
-};
-
struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -55,8 +47,19 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
-static bool addresses_equal(const struct mptcp_addr_info *a,
- const struct mptcp_addr_info *b, bool use_port)
+static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
+{
+ return net_generic(net, pm_nl_pernet_id);
+}
+
+static struct pm_nl_pernet *
+pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
+{
+ return pm_nl_get_pernet(sock_net((struct sock *)msk));
+}
+
+bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port)
{
bool addr_equals = false;
@@ -83,16 +86,6 @@ static bool addresses_equal(const struct mptcp_addr_info *a,
return a->port == b->port;
}
-static bool address_zero(const struct mptcp_addr_info *addr)
-{
- struct mptcp_addr_info zero;
-
- memset(&zero, 0, sizeof(zero));
- zero.family = addr->family;
-
- return addresses_equal(addr, &zero, true);
-}
-
static void local_address(const struct sock_common *skc,
struct mptcp_addr_info *addr)
{
@@ -120,7 +113,7 @@ static void remote_address(const struct sock_common *skc,
}
static bool lookup_subflow_by_saddr(const struct list_head *list,
- struct mptcp_addr_info *saddr)
+ const struct mptcp_addr_info *saddr)
{
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info cur;
@@ -130,7 +123,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
local_address(skc, &cur);
- if (addresses_equal(&cur, saddr, saddr->port))
+ if (mptcp_addresses_equal(&cur, saddr, saddr->port))
return true;
}
@@ -138,7 +131,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
}
static bool lookup_subflow_by_daddr(const struct list_head *list,
- struct mptcp_addr_info *daddr)
+ const struct mptcp_addr_info *daddr)
{
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info cur;
@@ -148,7 +141,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
remote_address(skc, &cur);
- if (addresses_equal(&cur, daddr, daddr->port))
+ if (mptcp_addresses_equal(&cur, daddr, daddr->port))
return true;
}
@@ -157,10 +150,10 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
- struct mptcp_sock *msk)
+ const struct mptcp_sock *msk)
{
+ const struct sock *sk = (const struct sock *)msk;
struct mptcp_pm_addr_entry *entry, *ret = NULL;
- struct sock *sk = (struct sock *)msk;
msk_owned_by_me(msk);
@@ -190,7 +183,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
}
static struct mptcp_pm_addr_entry *
-select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
+select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
@@ -214,45 +207,41 @@ select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
return ret;
}
-unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_signal_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
-unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_accept_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
-unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->subflows_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
-unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk)
+unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->local_addr_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
@@ -264,15 +253,15 @@ bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
}
struct mptcp_pm_add_entry *
-mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
lockdep_assert_held(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, addr, true))
+ if (mptcp_addresses_equal(&entry->addr, addr, true))
return entry;
}
@@ -289,7 +278,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, &saddr, true)) {
+ if (mptcp_addresses_equal(&entry->addr, &saddr, true)) {
ret = true;
goto out;
}
@@ -346,7 +335,7 @@ out:
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr, bool check_id)
+ const struct mptcp_addr_info *addr, bool check_id)
{
struct mptcp_pm_add_entry *entry;
struct sock *sk = (struct sock *)msk;
@@ -363,8 +352,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
return entry;
}
-static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
+ const struct mptcp_pm_addr_entry *entry)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
@@ -372,8 +361,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
- if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr))
- return false;
+ add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
+
+ if (add_entry) {
+ if (mptcp_pm_is_kernel(msk))
+ return false;
+
+ sk_reset_timer(sk, &add_entry->add_timer,
+ jiffies + mptcp_get_add_addr_timeout(net));
+ return true;
+ }
add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
if (!add_entry)
@@ -410,13 +407,13 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
- struct mptcp_addr_info *addr)
+static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
+ const struct mptcp_addr_info *addr)
{
int i;
for (i = 0; i < nr; i++) {
- if (addresses_equal(&addrs[i], addr, addr->port))
+ if (addrs[i].id == addr->id)
return true;
}
@@ -452,7 +449,8 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- if (deny_id0 && addresses_equal(&addrs[i], &remote, false))
+ addrs[i].id = subflow->remote_id;
+ if (deny_id0 && !addrs[i].id)
continue;
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
@@ -466,6 +464,37 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
return i;
}
+static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ bool prio, bool backup)
+{
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+
+ pr_debug("send ack for %s",
+ prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"));
+
+ slow = lock_sock_fast(ssk);
+ if (prio) {
+ if (subflow->backup != backup)
+ msk->last_snd = NULL;
+
+ subflow->send_mp_prio = 1;
+ subflow->backup = backup;
+ subflow->request_bkup = backup;
+ }
+
+ __mptcp_subflow_send_ack(ssk);
+ unlock_sock_fast(ssk, slow);
+}
+
+static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ bool prio, bool backup)
+{
+ spin_unlock_bh(&msk->pm.lock);
+ __mptcp_pm_send_ack(msk, subflow, prio, backup);
+ spin_lock_bh(&msk->pm.lock);
+}
+
static struct mptcp_pm_addr_entry *
__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
@@ -478,21 +507,19 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
-static int
-lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+static struct mptcp_pm_addr_entry *
+__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
+ bool lookup_by_id)
{
struct mptcp_pm_addr_entry *entry;
- int ret = -1;
- rcu_read_lock();
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
- ret = entry->addr.id;
- break;
- }
+ if ((!lookup_by_id &&
+ mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) ||
+ (lookup_by_id && entry->addr.id == info->id))
+ return entry;
}
- rcu_read_unlock();
- return ret;
+ return NULL;
}
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
@@ -504,7 +531,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(sock_net(sk));
add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
local_addr_max = mptcp_pm_get_local_addr_max(msk);
@@ -512,13 +539,23 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* do lazy endpoint usage accounting for the MPC subflows */
if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first);
+ struct mptcp_pm_addr_entry *entry;
struct mptcp_addr_info mpc_addr;
- int mpc_id;
+ bool backup = false;
local_address((struct sock_common *)msk->first, &mpc_addr);
- mpc_id = lookup_id_by_addr(pernet, &mpc_addr);
- if (mpc_id >= 0)
- __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
+ rcu_read_lock();
+ entry = __lookup_addr(pernet, &mpc_addr, false);
+ if (entry) {
+ __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
+ msk->mpc_endpoint_id = entry->addr.id;
+ backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ }
+ rcu_read_unlock();
+
+ if (backup)
+ mptcp_pm_send_ack(msk, subflow, true, backup);
msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
}
@@ -532,6 +569,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet, msk);
+ /* due to racing events on both ends we can reach here while
+ * previous add address is still running: if we invoke now
+ * mptcp_pm_announce_addr(), that will fail and the
+ * corresponding id will be marked as used.
+ * Instead let the PM machinery reschedule us when the
+ * current address announce will be completed.
+ */
+ if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
+ return;
+
if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
@@ -590,7 +637,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
unsigned int subflows_max;
int i = 0;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
@@ -645,15 +692,20 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
- if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
- goto add_addr_echo;
+ remote = msk->pm.remote;
+ mptcp_pm_announce_addr(msk, &remote, true);
+ mptcp_pm_nl_addr_send_ack(msk);
+
+ if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
+ return;
+
+ /* pick id 0 port, if none is provided the remote address */
+ if (!remote.port)
+ remote.port = sk->sk_dport;
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- remote = msk->pm.remote;
- if (!remote.port)
- remote.port = sk->sk_dport;
nr = fill_local_addresses_vec(msk, addrs);
msk->pm.add_addr_accepted++;
@@ -665,10 +717,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
-
-add_addr_echo:
- mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
- mptcp_pm_nl_addr_send_ack(msk);
}
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
@@ -683,21 +731,14 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
return;
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
- if (subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for %s",
- mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
-
- mptcp_subflow_send_ack(ssk);
- spin_lock_bh(&msk->pm.lock);
- }
+ if (subflow)
+ mptcp_pm_send_ack(msk, subflow, false, false);
}
-static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
- u8 bkup)
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ struct mptcp_addr_info *rem,
+ u8 bkup)
{
struct mptcp_subflow_context *subflow;
@@ -705,29 +746,30 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- struct sock *sk = (struct sock *)msk;
- struct mptcp_addr_info local;
+ struct mptcp_addr_info local, remote;
local_address((struct sock_common *)ssk, &local);
- if (!addresses_equal(&local, addr, addr->port))
+ if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
- subflow->backup = bkup;
- subflow->send_mp_prio = 1;
- subflow->request_bkup = bkup;
- __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX);
-
- spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for mp_prio");
- mptcp_subflow_send_ack(ssk);
- spin_lock_bh(&msk->pm.lock);
+ if (rem && rem->family != AF_UNSPEC) {
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!mptcp_addresses_equal(&remote, rem, rem->port))
+ continue;
+ }
+ __mptcp_pm_send_ack(msk, subflow, true, bkup);
return 0;
}
return -EINVAL;
}
+static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id)
+{
+ return local_id == id || (!local_id && msk->mpc_endpoint_id == id);
+}
+
static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list,
enum linux_mptcp_mib_field rm_type)
@@ -751,22 +793,23 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
return;
for (i = 0; i < rm_list->nr; i++) {
+ u8 rm_id = rm_list->ids[i];
bool removed = false;
- list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
u8 id = subflow->local_id;
- if (rm_type == MPTCP_MIB_RMADDR)
- id = subflow->remote_id;
-
- if (rm_list->ids[i] != id)
+ if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ continue;
+ if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
- pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u",
+ pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
+ i, rm_id, subflow->local_id, subflow->remote_id,
+ msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -777,10 +820,14 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ if (rm_type == MPTCP_MIB_RMSUBFLOW)
+ __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
if (!removed)
continue;
+ if (!mptcp_pm_is_kernel(msk))
+ continue;
+
if (rm_type == MPTCP_MIB_RMADDR) {
msk->pm.add_addr_accepted--;
WRITE_ONCE(msk->pm.accept_addr, true);
@@ -844,10 +891,18 @@ static bool address_use_port(struct mptcp_pm_addr_entry *entry)
MPTCP_PM_ADDR_FLAG_SIGNAL;
}
+/* caller must ensure the RCU grace period is already elapsed */
+static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
+{
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
+}
+
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
struct mptcp_pm_addr_entry *entry)
{
- struct mptcp_pm_addr_entry *cur;
+ struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
int ret = -EINVAL;
@@ -865,11 +920,26 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
/* do not insert duplicate address, differentiate on port only
* singled addresses
*/
+ if (!address_use_port(entry))
+ entry->addr.port = 0;
list_for_each_entry(cur, &pernet->local_addr_list, list) {
- if (addresses_equal(&cur->addr, &entry->addr,
- address_use_port(entry) &&
- address_use_port(cur)))
- goto out;
+ if (mptcp_addresses_equal(&cur->addr, &entry->addr,
+ cur->addr.port || entry->addr.port)) {
+ /* allow replacing the exiting endpoint only if such
+ * endpoint is an implicit one and the user-space
+ * did not provide an endpoint id
+ */
+ if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT))
+ goto out;
+ if (entry->addr.id)
+ goto out;
+
+ pernet->addrs--;
+ entry->addr.id = cur->addr.id;
+ list_del_rcu(&cur->list);
+ del_entry = cur;
+ break;
+ }
}
if (!entry->addr.id) {
@@ -900,17 +970,27 @@ find_next:
}
pernet->addrs++;
- list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
+ if (!entry->addr.port)
+ list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
+ else
+ list_add_rcu(&entry->list, &pernet->local_addr_list);
ret = entry->addr.id;
out:
spin_unlock_bh(&pernet->lock);
+
+ /* just replaced an existing entry, free it */
+ if (del_entry) {
+ synchronize_rcu();
+ __mptcp_pm_release_addr_entry(del_entry);
+ }
return ret;
}
static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
struct mptcp_pm_addr_entry *entry)
{
+ int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
struct mptcp_sock *msk;
struct socket *ssock;
@@ -935,8 +1015,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
}
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
- err = kernel_bind(ssock, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in));
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (entry->addr.family == AF_INET6)
+ addrlen = sizeof(struct sockaddr_in6);
+#endif
+ err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
if (err) {
pr_warn("kernel_bind error, err=%d", err);
goto out;
@@ -971,17 +1054,17 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
*/
local_address((struct sock_common *)msk, &msk_local);
local_address((struct sock_common *)skc, &skc_local);
- if (addresses_equal(&msk_local, &skc_local, false))
+ if (mptcp_addresses_equal(&msk_local, &skc_local, false))
return 0;
- if (address_zero(&skc_local))
- return 0;
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_local_id(msk, &skc_local);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -999,7 +1082,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
entry->addr.id = 0;
entry->addr.port = 0;
entry->ifindex = 0;
- entry->flags = 0;
+ entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
@@ -1008,18 +1091,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return ret;
}
-void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
-{
- struct mptcp_pm_data *pm = &msk->pm;
- bool subflows;
-
- subflows = !!mptcp_pm_get_subflows_max(msk);
- WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) ||
- !!mptcp_pm_get_add_addr_signal_max(msk));
- WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows);
- WRITE_ONCE(pm->accept_subflow, subflows);
-}
-
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
@@ -1047,6 +1118,10 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
NLA_POLICY_NESTED(mptcp_pm_addr_policy),
[MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] =
+ NLA_POLICY_NESTED(mptcp_pm_addr_policy),
};
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
@@ -1076,7 +1151,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss
}
unlock_sock_fast(ssk, slow);
- /* always try to push the pending data regarless of re-injections:
+ /* always try to push the pending data regardless of re-injections:
* we can possibly use backup subflows now, and subflow selection
* is cheap under the msk socket lock
*/
@@ -1095,11 +1170,12 @@ static int mptcp_pm_family_to_addr(int family)
return MPTCP_PM_ADDR_ATTR_ADDR4;
}
-static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
- bool require_family,
- struct mptcp_pm_addr_entry *entry)
+static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
+ const struct nlattr *attr,
+ struct genl_info *info,
+ struct mptcp_addr_info *addr,
+ bool require_family)
{
- struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
int err, addr_addr;
if (!attr) {
@@ -1113,27 +1189,29 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
if (err)
return err;
- memset(entry, 0, sizeof(*entry));
+ if (tb[MPTCP_PM_ADDR_ATTR_ID])
+ addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
+
if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
if (!require_family)
- goto skip_family;
+ return err;
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing family");
return -EINVAL;
}
- entry->addr.family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
- if (entry->addr.family != AF_INET
+ addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
+ if (addr->family != AF_INET
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- && entry->addr.family != AF_INET6
+ && addr->family != AF_INET6
#endif
) {
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"unknown address family");
return -EINVAL;
}
- addr_addr = mptcp_pm_family_to_addr(entry->addr.family);
+ addr_addr = mptcp_pm_family_to_addr(addr->family);
if (!tb[addr_addr]) {
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing address data");
@@ -1141,40 +1219,59 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (entry->addr.family == AF_INET6)
- entry->addr.addr6 = nla_get_in6_addr(tb[addr_addr]);
+ if (addr->family == AF_INET6)
+ addr->addr6 = nla_get_in6_addr(tb[addr_addr]);
else
#endif
- entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]);
+ addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]);
+
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+
+ return err;
+}
+
+int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
+ struct mptcp_addr_info *addr)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ memset(addr, 0, sizeof(*addr));
+
+ return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true);
+}
+
+int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
+ bool require_family,
+ struct mptcp_pm_addr_entry *entry)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+ int err;
+
+ memset(entry, 0, sizeof(*entry));
+
+ err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family);
+ if (err)
+ return err;
-skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
entry->ifindex = val;
}
- if (tb[MPTCP_PM_ADDR_ATTR_ID])
- entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
-
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
- if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
- if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- NL_SET_ERR_MSG_ATTR(info->extack, attr,
- "flags must have signal when using port");
- return -EINVAL;
- }
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
- }
return 0;
}
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
{
- return net_generic(genl_info_net(info), pm_nl_pernet_id);
+ return pm_nl_get_pernet(genl_info_net(info));
}
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
@@ -1185,7 +1282,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- if (!READ_ONCE(msk->fully_established))
+ if (!READ_ONCE(msk->fully_established) ||
+ mptcp_pm_is_userspace(msk))
goto next;
lock_sock(sk);
@@ -1209,11 +1307,27 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
struct mptcp_pm_addr_entry addr, *entry;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, true, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, true, &addr);
if (ret < 0)
return ret;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ GENL_SET_ERR_MSG(info, "flags must have signal when using port");
+ return -EINVAL;
+ }
+
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh");
+ return -EINVAL;
+ }
+
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+ GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint");
+ return -EINVAL;
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
return -ENOMEM;
@@ -1242,17 +1356,25 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
u8 *flags, int *ifindex)
{
struct mptcp_pm_addr_entry *entry;
+ struct sock *sk = (struct sock *)msk;
+ struct net *net = sock_net(sk);
*flags = 0;
*ifindex = 0;
if (id) {
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk,
+ id,
+ flags,
+ ifindex);
+
rcu_read_lock();
- entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
if (entry) {
*flags = entry->flags;
*ifindex = entry->ifindex;
@@ -1264,7 +1386,7 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
}
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr)
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
@@ -1279,7 +1401,7 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
}
static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
+ const struct mptcp_addr_info *addr,
bool force)
{
struct mptcp_rm_list list = { .nr = 0 };
@@ -1297,11 +1419,12 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
}
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
- struct mptcp_addr_info *addr)
+ const struct mptcp_pm_addr_entry *entry)
{
- struct mptcp_sock *msk;
- long s_slot = 0, s_num = 0;
+ const struct mptcp_addr_info *addr = &entry->addr;
struct mptcp_rm_list list = { .nr = 0 };
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
pr_debug("remove_id=%d", addr->id);
@@ -1311,6 +1434,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
struct sock *sk = (struct sock *)msk;
bool remove_subflow;
+ if (mptcp_pm_is_userspace(msk))
+ goto next;
+
if (list_empty(&msk->conn_list)) {
mptcp_pm_remove_anno_addr(msk, addr, false);
goto next;
@@ -1318,7 +1444,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
lock_sock(sk);
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
- mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
+ mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
+ !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
if (remove_subflow)
mptcp_pm_remove_subflow(msk, &list);
release_sock(sk);
@@ -1331,14 +1458,6 @@ next:
return 0;
}
-/* caller must ensure the RCU grace period is already elapsed */
-static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
-{
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree(entry);
-}
-
static int mptcp_nl_remove_id_zero_address(struct net *net,
struct mptcp_addr_info *addr)
{
@@ -1352,11 +1471,11 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info msk_local;
- if (list_empty(&msk->conn_list))
+ if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
local_address((struct sock_common *)msk, &msk_local);
- if (!addresses_equal(&msk_local, addr, addr->port))
+ if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
goto next;
lock_sock(sk);
@@ -1382,7 +1501,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
unsigned int addr_max;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1415,29 +1534,27 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
__clear_bit(entry->addr.id, pernet->id_bitmap);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
+ mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
synchronize_rcu();
__mptcp_pm_release_addr_entry(entry);
return ret;
}
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, rm_list, list) {
if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX &&
- slist.nr < MPTCP_RM_IDS_MAX) {
- alist.ids[alist.nr++] = entry->addr.id;
+ slist.nr < MPTCP_RM_IDS_MAX)
slist.ids[slist.nr++] = entry->addr.id;
- } else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX) {
+
+ if (remove_anno_list_by_saddr(msk, &entry->addr) &&
+ alist.nr < MPTCP_RM_IDS_MAX)
alist.ids[alist.nr++] = entry->addr.id;
- }
}
if (alist.nr) {
@@ -1461,9 +1578,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
- release_sock(sk);
+ if (!mptcp_pm_is_userspace(msk)) {
+ lock_sock(sk);
+ mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ release_sock(sk);
+ }
sock_put(sk);
cond_resched();
@@ -1556,7 +1675,7 @@ static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info)
void *reply;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1607,7 +1726,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
void *hdr;
int i;
- pernet = net_generic(net, pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(net);
spin_lock_bh(&pernet->lock);
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
@@ -1714,9 +1833,22 @@ fail:
return -EMSGSIZE;
}
-static int mptcp_nl_addr_backup(struct net *net,
- struct mptcp_addr_info *addr,
- u8 bkup)
+static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr)
+{
+ struct mptcp_rm_list list = { .nr = 0 };
+
+ list.ids[list.nr++] = addr->id;
+
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_nl_rm_subflow_received(msk, &list);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
+ spin_unlock_bh(&msk->pm.lock);
+}
+
+static int mptcp_nl_set_flags(struct net *net,
+ struct mptcp_addr_info *addr,
+ u8 bkup, u8 changed)
{
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
@@ -1725,13 +1857,14 @@ static int mptcp_nl_addr_backup(struct net *net,
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- if (list_empty(&msk->conn_list))
+ if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
lock_sock(sk);
- spin_lock_bh(&msk->pm.lock);
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
- spin_unlock_bh(&msk->pm.lock);
+ if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup);
+ if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
+ mptcp_pm_nl_fullmesh(msk, addr);
release_sock(sk);
next:
@@ -1745,16 +1878,27 @@ next:
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
+ struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+ u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
+ MPTCP_PM_ADDR_FLAG_FULLMESH;
struct net *net = sock_net(skb->sk);
u8 bkup = 0, lookup_by_id = 0;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
+ if (attr_rem) {
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
+ if (ret < 0)
+ return ret;
+ }
+
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
if (addr.addr.family == AF_UNSPEC) {
@@ -1763,18 +1907,28 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
- (lookup_by_id && entry->addr.id == addr.addr.id)) {
- mptcp_nl_addr_backup(net, &entry->addr, bkup);
-
- if (bkup)
- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
- else
- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
- }
+ if (token)
+ return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
+ token, &addr, &remote, bkup);
+
+ spin_lock_bh(&pernet->lock);
+ entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
+ if (!entry) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
+ }
+ if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
}
+ changed = (addr.flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
+ addr = *entry;
+ spin_unlock_bh(&pernet->lock);
+
+ mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
return 0;
}
@@ -1784,6 +1938,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf
nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
}
+bool mptcp_userspace_pm_active(const struct mptcp_sock *msk)
+{
+ return genl_has_listeners(&mptcp_genl_family,
+ sock_net((const struct sock *)msk),
+ MPTCP_PM_EV_GRP_OFFSET);
+}
+
static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
{
const struct inet_sock *issk = inet_sk(ssk);
@@ -1904,6 +2065,9 @@ static int mptcp_event_created(struct sk_buff *skb,
if (err)
return err;
+ if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
+ return -EMSGSIZE;
+
return mptcp_event_add_subflow(skb, ssk);
}
@@ -1938,10 +2102,12 @@ nla_put_failure:
kfree_skb(skb);
}
-void mptcp_event_addr_announced(const struct mptcp_sock *msk,
+void mptcp_event_addr_announced(const struct sock *ssk,
const struct mptcp_addr_info *info)
{
- struct net *net = sock_net((const struct sock *)msk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct net *net = sock_net(ssk);
struct nlmsghdr *nlh;
struct sk_buff *skb;
@@ -1963,7 +2129,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk,
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
goto nla_put_failure;
- if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT,
+ info->port == 0 ?
+ inet_sk(ssk)->inet_dport :
+ info->port))
goto nla_put_failure;
switch (info->family) {
@@ -2049,17 +2218,17 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_ADD_ADDR,
.doit = mptcp_nl_cmd_add_addr,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = MPTCP_PM_CMD_DEL_ADDR,
.doit = mptcp_nl_cmd_del_addr,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
.doit = mptcp_nl_cmd_flush_addrs,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = MPTCP_PM_CMD_GET_ADDR,
@@ -2069,7 +2238,7 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_SET_LIMITS,
.doit = mptcp_nl_cmd_set_limits,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
},
{
.cmd = MPTCP_PM_CMD_GET_LIMITS,
@@ -2078,7 +2247,27 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_SET_FLAGS,
.doit = mptcp_nl_cmd_set_flags,
- .flags = GENL_ADMIN_PERM,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_ANNOUNCE,
+ .doit = mptcp_nl_cmd_announce,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_REMOVE,
+ .doit = mptcp_nl_cmd_remove,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
+ .doit = mptcp_nl_cmd_sf_create,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ .doit = mptcp_nl_cmd_sf_destroy,
+ .flags = GENL_UNS_ADMIN_PERM,
},
};
@@ -2091,13 +2280,14 @@ static struct genl_family mptcp_genl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = mptcp_pm_ops,
.n_small_ops = ARRAY_SIZE(mptcp_pm_ops),
+ .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1,
.mcgrps = mptcp_pm_mcgrps,
.n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps),
};
static int __net_init pm_nl_init_net(struct net *net)
{
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
@@ -2119,7 +2309,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct net *net;
list_for_each_entry(net, net_list, exit_list) {
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
/* net is removed from namespace list, can't race with
* other modifiers, also netns core already waited for a
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
new file mode 100644
index 000000000000..9e82250cbb70
--- /dev/null
+++ b/net/mptcp/pm_userspace.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, Intel Corporation.
+ */
+
+#include "protocol.h"
+#include "mib.h"
+
+void mptcp_free_local_addr_list(struct mptcp_sock *msk)
+{
+ struct mptcp_pm_addr_entry *entry, *tmp;
+ struct sock *sk = (struct sock *)msk;
+ LIST_HEAD(free_list);
+
+ if (!mptcp_pm_is_userspace(msk))
+ return;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list);
+ spin_unlock_bh(&msk->pm.lock);
+
+ list_for_each_entry_safe(entry, tmp, &free_list, list) {
+ sock_kfree_s(sk, entry, sizeof(*entry));
+ }
+}
+
+int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
+{
+ DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ struct mptcp_pm_addr_entry *match = NULL;
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *e;
+ bool addr_match = false;
+ bool id_match = false;
+ int ret = -EINVAL;
+
+ bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
+ if (addr_match && entry->addr.id == 0)
+ entry->addr.id = e->addr.id;
+ id_match = (e->addr.id == entry->addr.id);
+ if (addr_match && id_match) {
+ match = e;
+ break;
+ } else if (addr_match || id_match) {
+ break;
+ }
+ __set_bit(e->addr.id, id_bitmap);
+ }
+
+ if (!match && !addr_match && !id_match) {
+ /* Memory for the entry is allocated from the
+ * sock option buffer.
+ */
+ e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
+ if (!e) {
+ spin_unlock_bh(&msk->pm.lock);
+ return -ENOMEM;
+ }
+
+ *e = *entry;
+ if (!e->addr.id)
+ e->addr.id = find_next_zero_bit(id_bitmap,
+ MPTCP_PM_MAX_ADDR_ID + 1,
+ 1);
+ list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list);
+ ret = e->addr.id;
+ } else if (match) {
+ ret = entry->addr.id;
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+ return ret;
+}
+
+int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry, *match = NULL;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (id == entry->addr.id) {
+ match = entry;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (match) {
+ *flags = match->flags;
+ *ifindex = match->ifindex;
+ }
+
+ return 0;
+}
+
+int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ struct mptcp_pm_addr_entry new_entry;
+ __be16 msk_sport = ((struct inet_sock *)
+ inet_sk((struct sock *)msk))->inet_sport;
+
+ memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
+ new_entry.addr = *skc;
+ new_entry.addr.id = 0;
+ new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
+
+ if (new_entry.addr.port == msk_sport)
+ new_entry.addr.port = 0;
+
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+}
+
+int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_pm_addr_entry addr_val;
+ struct mptcp_sock *msk;
+ int err = -EINVAL;
+ u32 token_val;
+
+ if (!addr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto announce_err;
+ }
+
+ err = mptcp_pm_parse_entry(addr, info, true, &addr_val);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "error parsing local address");
+ goto announce_err;
+ }
+
+ if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ GENL_SET_ERR_MSG(info, "invalid addr id or flags");
+ goto announce_err;
+ }
+
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "did not match address and id");
+ goto announce_err;
+ }
+
+ lock_sock((struct sock *)msk);
+ spin_lock_bh(&msk->pm.lock);
+
+ if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ mptcp_pm_announce_addr(msk, &addr_val.addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock((struct sock *)msk);
+
+ err = 0;
+ announce_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
+ struct mptcp_pm_addr_entry *match = NULL;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_sock *msk;
+ LIST_HEAD(free_list);
+ int err = -EINVAL;
+ u32 token_val;
+ u8 id_val;
+
+ if (!id || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ id_val = nla_get_u8(id);
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto remove_err;
+ }
+
+ lock_sock((struct sock *)msk);
+
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (entry->addr.id == id_val) {
+ match = entry;
+ break;
+ }
+ }
+
+ if (!match) {
+ GENL_SET_ERR_MSG(info, "address with specified id not found");
+ release_sock((struct sock *)msk);
+ goto remove_err;
+ }
+
+ list_move(&match->list, &free_list);
+
+ mptcp_pm_remove_addrs_and_subflows(msk, &free_list);
+
+ release_sock((struct sock *)msk);
+
+ list_for_each_entry_safe(match, entry, &free_list, list) {
+ sock_kfree_s((struct sock *)msk, match, sizeof(*match));
+ }
+
+ err = 0;
+ remove_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info addr_r;
+ struct mptcp_addr_info addr_l;
+ struct mptcp_sock *msk;
+ int err = -EINVAL;
+ struct sock *sk;
+ u32 token_val;
+
+ if (!laddr || !raddr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(genl_info_net(info), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto create_err;
+ }
+
+ err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ goto create_err;
+ }
+
+ if (addr_l.id == 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
+ goto create_err;
+ }
+
+ err = mptcp_pm_parse_addr(raddr, info, &addr_r);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ goto create_err;
+ }
+
+ sk = &msk->sk.icsk_inet.sk;
+ lock_sock(sk);
+
+ err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
+
+ release_sock(sk);
+
+ create_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *local,
+ const struct mptcp_addr_info *remote)
+{
+ struct mptcp_subflow_context *subflow;
+
+ if (local->family != remote->family)
+ return NULL;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ const struct inet_sock *issk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+
+ if (local->family != ssk->sk_family)
+ continue;
+
+ issk = inet_sk(ssk);
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (issk->inet_saddr != local->addr.s_addr ||
+ issk->inet_daddr != remote->addr.s_addr)
+ continue;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *pinfo = inet6_sk(ssk);
+
+ if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) ||
+ !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr))
+ continue;
+ break;
+ }
+#endif
+ default:
+ continue;
+ }
+
+ if (issk->inet_sport == local->port &&
+ issk->inet_dport == remote->port)
+ return ssk;
+ }
+
+ return NULL;
+}
+
+int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info addr_l;
+ struct mptcp_addr_info addr_r;
+ struct mptcp_sock *msk;
+ struct sock *sk, *ssk;
+ int err = -EINVAL;
+ u32 token_val;
+
+ if (!laddr || !raddr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(genl_info_net(info), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto destroy_err;
+ }
+
+ err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ goto destroy_err;
+ }
+
+ err = mptcp_pm_parse_addr(raddr, info, &addr_r);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ goto destroy_err;
+ }
+
+ if (addr_l.family != addr_r.family) {
+ GENL_SET_ERR_MSG(info, "address families do not match");
+ goto destroy_err;
+ }
+
+ if (!addr_l.port || !addr_r.port) {
+ GENL_SET_ERR_MSG(info, "missing local or remote port");
+ goto destroy_err;
+ }
+
+ sk = &msk->sk.icsk_inet.sk;
+ lock_sock(sk);
+ ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
+ if (ssk) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+ mptcp_close_ssk(sk, ssk, subflow);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
+ err = 0;
+ } else {
+ err = -ESRCH;
+ }
+ release_sock(sk);
+
+destroy_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup)
+{
+ struct mptcp_sock *msk;
+ int ret = -EINVAL;
+ u32 token_val;
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(net, token_val);
+ if (!msk)
+ return ret;
+
+ if (!mptcp_pm_is_userspace(msk))
+ goto set_flags_err;
+
+ if (loc->addr.family == AF_UNSPEC ||
+ rem->addr.family == AF_UNSPEC)
+ goto set_flags_err;
+
+ lock_sock((struct sock *)msk);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
+ release_sock((struct sock *)msk);
+
+set_flags_err:
+ sock_put((struct sock *)msk);
+ return ret;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f60f01b14fac..b6dc6e260334 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -117,6 +117,9 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
+
+ /* This is the first subflow, always with id 0 */
+ subflow->local_id_valid = 1;
mptcp_sock_graft(msk->first, sk->sk_socket);
return 0;
@@ -147,9 +150,15 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq,
to->len, MPTCP_SKB_CB(from)->end_seq);
MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
- kfree_skb_partial(from, fragstolen);
+
+ /* note the fwd memory can reach a negative value after accounting
+ * for the delta, but the later skb free will restore a non
+ * negative one
+ */
atomic_add(delta, &sk->sk_rmem_alloc);
mptcp_rmem_charge(sk, delta);
+ kfree_skb_partial(from, fragstolen);
+
return true;
}
@@ -164,8 +173,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ amount >>= PAGE_SHIFT;
+ mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
__sk_mem_reduce_allocated(sk, amount);
}
@@ -178,8 +187,8 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
/* see sk_mem_uncharge() for the rationale behind the following schema */
- if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
- __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK);
+ if (unlikely(reclaimable >= PAGE_SIZE))
+ __mptcp_rmem_reclaim(sk, reclaimable);
}
static void mptcp_rfree(struct sk_buff *skb)
@@ -213,7 +222,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
seq = MPTCP_SKB_CB(skb)->map_seq;
end_seq = MPTCP_SKB_CB(skb)->end_seq;
- max_seq = READ_ONCE(msk->rcv_wnd_sent);
+ max_seq = atomic64_read(&msk->rcv_wnd_sent);
pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
RB_EMPTY_ROOT(&msk->out_of_order_queue));
@@ -222,7 +231,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
mptcp_drop(sk, skb);
pr_debug("oow by %lld, rcv_wnd_sent %llu\n",
(unsigned long long)end_seq - (unsigned long)max_seq,
- (unsigned long long)msk->rcv_wnd_sent);
+ (unsigned long long)atomic64_read(&msk->rcv_wnd_sent));
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
return;
}
@@ -320,20 +329,16 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
struct mptcp_sock *msk = mptcp_sk(sk);
int amt, amount;
- if (size < msk->rmem_fwd_alloc)
+ if (size <= msk->rmem_fwd_alloc)
return true;
+ size -= msk->rmem_fwd_alloc;
amt = sk_mem_pages(size);
- amount = amt << SK_MEM_QUANTUM_SHIFT;
- msk->rmem_fwd_alloc += amount;
- if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
- if (ssk->sk_forward_alloc < amount) {
- msk->rmem_fwd_alloc -= amount;
- return false;
- }
+ amount = amt << PAGE_SHIFT;
+ if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
+ return false;
- ssk->sk_forward_alloc -= amount;
- }
+ msk->rmem_fwd_alloc += amount;
return true;
}
@@ -466,9 +471,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
static void mptcp_set_datafin_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 retransmits;
+
+ retransmits = min_t(u32, icsk->icsk_retransmits,
+ ilog2(TCP_RTO_MAX / TCP_RTO_MIN));
- mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
- TCP_RTO_MIN << icsk->icsk_retransmits);
+ mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits;
}
static void __mptcp_set_timeout(struct sock *sk, long tout)
@@ -494,19 +502,24 @@ static void mptcp_set_timeout(struct sock *sk)
__mptcp_set_timeout(sk, tout);
}
-static bool tcp_can_send_ack(const struct sock *ssk)
+static inline bool tcp_can_send_ack(const struct sock *ssk)
{
return !((1 << inet_sk_state_load(ssk)) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
-void mptcp_subflow_send_ack(struct sock *ssk)
+void __mptcp_subflow_send_ack(struct sock *ssk)
+{
+ if (tcp_can_send_ack(ssk))
+ tcp_send_ack(ssk);
+}
+
+static void mptcp_subflow_send_ack(struct sock *ssk)
{
bool slow;
slow = lock_sock_fast(ssk);
- if (tcp_can_send_ack(ssk))
- tcp_send_ack(ssk);
+ __mptcp_subflow_send_ack(ssk);
unlock_sock_fast(ssk, slow);
}
@@ -649,9 +662,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
skb = skb_peek(&ssk->sk_receive_queue);
if (!skb) {
- /* if no data is found, a racing workqueue/recvmsg
- * already processed the new data, stop here or we
- * can enter an infinite loop
+ /* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
+ * a different CPU can have already processed the pending
+ * data, stop here or we can enter an infinite loop
*/
if (!moved)
done = true;
@@ -659,9 +672,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
}
if (__mptcp_check_fallback(msk)) {
- /* if we are running under the workqueue, TCP could have
- * collapsed skbs between dummy map creation and now
- * be sure to adjust the size
+ /* Under fallback skbs have no MPTCP extension and TCP could
+ * collapse them between the dummy map creation and the
+ * current dequeue. Be sure to adjust the map size.
*/
map_remaining = skb->len;
subflow->map_data_len = skb->len;
@@ -960,25 +973,6 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static void __mptcp_mem_reclaim_partial(struct sock *sk)
-{
- int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
-
- lockdep_assert_held_once(&sk->sk_lock.slock);
-
- if (reclaimable > SK_MEM_QUANTUM)
- __mptcp_rmem_reclaim(sk, reclaimable - 1);
-
- sk_mem_reclaim_partial(sk);
-}
-
-static void mptcp_mem_reclaim_partial(struct sock *sk)
-{
- mptcp_data_lock(sk);
- __mptcp_mem_reclaim_partial(sk);
- mptcp_data_unlock(sk);
-}
-
static void dfrag_uncharge(struct sock *sk, int len)
{
sk_mem_uncharge(sk, len);
@@ -998,7 +992,6 @@ static void __mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- bool cleaned = false;
u64 snd_una;
/* on fallback we just need to ignore snd_una, as this is really
@@ -1021,7 +1014,6 @@ static void __mptcp_clean_una(struct sock *sk)
}
dfrag_clear(sk, dfrag);
- cleaned = true;
}
dfrag = mptcp_rtx_head(sk);
@@ -1043,7 +1035,6 @@ static void __mptcp_clean_una(struct sock *sk)
dfrag->already_sent -= delta;
dfrag_uncharge(sk, delta);
- cleaned = true;
}
/* all retransmitted data acked, recovery completed */
@@ -1051,9 +1042,6 @@ static void __mptcp_clean_una(struct sock *sk)
msk->recovery = false;
out:
- if (cleaned && tcp_under_memory_pressure(sk))
- __mptcp_mem_reclaim_partial(sk);
-
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
@@ -1135,18 +1123,21 @@ struct mptcp_sendmsg_info {
bool data_lock_held;
};
-static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
- int avail_size)
+static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
+ u64 data_seq, int avail_size)
{
u64 window_end = mptcp_wnd_end(msk);
+ u64 mptcp_snd_wnd;
if (__mptcp_check_fallback(msk))
return avail_size;
- if (!before64(data_seq + avail_size, window_end)) {
- u64 allowed_size = window_end - data_seq;
+ mptcp_snd_wnd = window_end - data_seq;
+ avail_size = min_t(unsigned int, mptcp_snd_wnd, avail_size);
- return min_t(unsigned int, allowed_size, avail_size);
+ if (unlikely(tcp_sk(ssk)->snd_wnd < mptcp_snd_wnd)) {
+ tcp_sk(ssk)->snd_wnd = min_t(u64, U32_MAX, mptcp_snd_wnd);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_SNDWNDSHARED);
}
return avail_size;
@@ -1193,6 +1184,7 @@ static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, g
tcp_skb_entail(ssk, skb);
return skb;
}
+ tcp_skb_tsorted_anchor_cleanup(skb);
kfree_skb(skb);
return NULL;
}
@@ -1201,12 +1193,6 @@ static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, boo
{
gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
- if (unlikely(tcp_under_memory_pressure(sk))) {
- if (data_lock_held)
- __mptcp_mem_reclaim_partial(sk);
- else
- mptcp_mem_reclaim_partial(sk);
- }
return __mptcp_alloc_tx_skb(sk, ssk, gfp);
}
@@ -1222,6 +1208,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}
+static void mptcp_update_infinite_map(struct mptcp_sock *msk,
+ struct sock *ssk,
+ struct mptcp_ext *mpext)
+{
+ if (!mpext)
+ return;
+
+ mpext->infinite_map = 1;
+ mpext->data_len = 0;
+
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
+ mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
+ pr_fallback(msk);
+ mptcp_do_fallback(ssk);
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1244,6 +1246,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
info->limit > dfrag->data_len))
return 0;
+ if (unlikely(!__tcp_can_send(ssk)))
+ return -EAGAIN;
+
/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
copy = info->size_goal;
@@ -1264,7 +1269,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1282,7 +1287,7 @@ alloc_skb:
}
/* Zero window and all data acked? Probe. */
- copy = mptcp_check_allowed_size(msk, data_seq, copy);
+ copy = mptcp_check_allowed_size(msk, ssk, data_seq, copy);
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
@@ -1353,6 +1358,9 @@ alloc_skb:
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(skb, copy);
+ if (mptcp_subflow_ctx(ssk)->send_infinite_map)
+ mptcp_update_infinite_map(msk, ssk, mpext);
+ trace_mptcp_sendmsg_frag(mpext);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
}
@@ -1414,7 +1422,8 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
return NULL;
- return sk_stream_memory_free(msk->first) ? msk->first : NULL;
+ return __tcp_can_send(msk->first) &&
+ sk_stream_memory_free(msk->first) ? msk->first : NULL;
}
/* re-use last subflow, if the burst allow that */
@@ -1472,11 +1481,16 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
* to check that subflow has a non empty cwin.
*/
ssk = send_info[SSK_MODE_ACTIVE].ssk;
- if (!ssk || !sk_stream_memory_free(ssk) || !tcp_sk(ssk)->snd_wnd)
+ if (!ssk || !sk_stream_memory_free(ssk))
return NULL;
- burst = min_t(int, MPTCP_SEND_BURST_SIZE, tcp_sk(ssk)->snd_wnd);
+ burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
+ if (!burst) {
+ msk->last_snd = NULL;
+ return ssk;
+ }
+
subflow = mptcp_subflow_ctx(ssk);
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
@@ -1530,8 +1544,9 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
struct mptcp_sendmsg_info info = {
.flags = flags,
};
+ bool do_check_data_fin = false;
struct mptcp_data_frag *dfrag;
- int len, copied = 0;
+ int len;
while ((dfrag = mptcp_send_head(sk))) {
info.sent = dfrag->already_sent;
@@ -1560,12 +1575,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
+ if (ret == -EAGAIN)
+ continue;
mptcp_push_release(ssk, &info);
goto out;
}
+ do_check_data_fin = true;
info.sent += ret;
- copied += ret;
len -= ret;
mptcp_update_post_push(msk, dfrag, ret);
@@ -1581,7 +1598,7 @@ out:
/* ensure the rtx timer is running */
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
- if (copied)
+ if (do_check_data_fin)
__mptcp_check_send_data_fin(sk);
}
@@ -1656,10 +1673,42 @@ static void mptcp_set_nospace(struct sock *sk)
set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+ size_t len, int *copied_syn)
+{
+ unsigned int saved_flags = msg->msg_flags;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int ret;
+
+ lock_sock(ssk);
+ msg->msg_flags |= MSG_DONTWAIT;
+ msk->connect_flags = O_NONBLOCK;
+ msk->is_sendmsg = 1;
+ ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
+ msk->is_sendmsg = 0;
+ msg->msg_flags = saved_flags;
+ release_sock(ssk);
+
+ /* do the blocking bits of inet_stream_connect outside the ssk socket lock */
+ if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
+ ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+ msg->msg_namelen, msg->msg_flags, 1);
+
+ /* Keep the same behaviour of plain TCP: zero the copied bytes in
+ * case of any error, except timeout or signal
+ */
+ if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
+ *copied_syn = 0;
+ }
+
+ return ret;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
+ struct socket *ssock;
size_t copied = 0;
int ret = 0;
long timeo;
@@ -1673,14 +1722,30 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
+ int copied_syn = 0;
+
+ ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
+ copied += copied_syn;
+ if (ret == -EINPROGRESS && copied_syn > 0)
+ goto out;
+ else if (ret)
+ goto do_error;
+ }
+
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
ret = sk_stream_wait_connect(sk, &timeo);
if (ret)
- goto out;
+ goto do_error;
}
+ ret = -EPIPE;
+ if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)))
+ goto do_error;
+
pfrag = sk_page_frag(sk);
while (msg_data_left(msg)) {
@@ -1689,11 +1754,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
bool dfrag_collapsed;
size_t psize, offset;
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
- ret = -EPIPE;
- goto out;
- }
-
/* reuse tail pfrag, if possible, or carve a new one from the
* page allocator
*/
@@ -1725,7 +1785,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
ret = -EFAULT;
- goto out;
+ goto do_error;
}
/* data successfully copied into the write queue */
@@ -1757,7 +1817,7 @@ wait_for_memory:
__mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
- goto out;
+ goto do_error;
}
if (copied)
@@ -1765,7 +1825,14 @@ wait_for_memory:
out:
release_sock(sk);
- return copied ? : ret;
+ return copied;
+
+do_error:
+ if (copied)
+ goto out;
+
+ copied = sk_stream_error(sk, msg->msg_flags, ret);
+ goto out;
}
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
@@ -1869,7 +1936,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
@@ -1887,7 +1954,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(rcvwin, advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
@@ -2004,7 +2071,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
}
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct scm_timestamping_internal tss;
@@ -2022,7 +2089,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2268,8 +2335,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if (flags & MPTCP_CF_FASTCLOSE)
+ if (flags & MPTCP_CF_FASTCLOSE) {
+ /* be sure to force the tcp_disconnect() path,
+ * to generate the egress reset
+ */
+ ssk->sk_lingertime = 0;
+ sock_set_flag(ssk, SOCK_LINGER);
subflow->send_fastclose = 1;
+ }
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
@@ -2297,6 +2370,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
+ if (ssk->sk_state == TCP_LISTEN) {
+ tcp_set_state(ssk, TCP_CLOSE);
+ mptcp_subflow_queue_clean(ssk);
+ inet_csk_listen_stop(ssk);
+ }
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2342,7 +2420,7 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
might_sleep();
- list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (inet_sk_state_load(ssk) != TCP_CLOSE)
@@ -2385,7 +2463,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
mptcp_token_destroy(msk);
- list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
bool slow;
@@ -2397,12 +2475,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
unlock_sock_fast(tcp_sk, slow);
}
+ /* Mirror the tcp_reset() error propagation */
+ switch (sk->sk_state) {
+ case TCP_SYN_SENT:
+ sk->sk_err = ECONNREFUSED;
+ break;
+ case TCP_CLOSE_WAIT:
+ sk->sk_err = EPIPE;
+ break;
+ case TCP_CLOSE:
+ return;
+ default:
+ sk->sk_err = ECONNRESET;
+ }
+
inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_shutdown = SHUTDOWN_MASK;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
- mptcp_close_wake_up(sk);
+ /* the calling mptcp_worker will properly destroy the socket */
+ if (sock_flag(sk, SOCK_DEAD))
+ return;
+
+ sk->sk_state_change(sk);
+ sk_error_report(sk);
}
static void __mptcp_retrans(struct sock *sk)
@@ -2457,6 +2554,7 @@ static void __mptcp_retrans(struct sock *sk)
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
}
release_sock(ssk);
@@ -2468,10 +2566,60 @@ reset_timer:
mptcp_reset_timer(sk);
}
+/* schedule the timeout timer for the relevant event: either close timeout
+ * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
+ */
+void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+{
+ struct sock *sk = (struct sock *)msk;
+ unsigned long timeout, close_timeout;
+
+ if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ return;
+
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+
+ /* the close timeout takes precedence on the fail one, and here at least one of
+ * them is active
+ */
+ timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+
+ sk_reset_timer(sk, &sk->sk_timer, timeout);
+}
+
+static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
+{
+ struct sock *ssk = msk->first;
+ bool slow;
+
+ if (!ssk)
+ return;
+
+ pr_debug("MP_FAIL doesn't respond, reset the subflow");
+
+ slow = lock_sock_fast(ssk);
+ mptcp_subflow_reset(ssk);
+ WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
+ unlock_sock_fast(ssk, slow);
+
+ mptcp_reset_timeout(msk, 0);
+}
+
+static void mptcp_do_fastclose(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow, *tmp;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp)
+ __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
+ subflow, MPTCP_CF_FASTCLOSE);
+}
+
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
struct sock *sk = &msk->sk.icsk_inet.sk;
+ unsigned long fail_tout;
int state;
lock_sock(sk);
@@ -2495,11 +2643,15 @@ static void mptcp_worker(struct work_struct *work)
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
*/
- if (sock_flag(sk, SOCK_DEAD) &&
- (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) {
- inet_sk_state_store(sk, TCP_CLOSE);
- __mptcp_destroy_sock(sk);
- goto unlock;
+ if (sock_flag(sk, SOCK_DEAD)) {
+ if (mptcp_check_close_timeout(sk)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ }
+ if (sk->sk_state == TCP_CLOSE) {
+ __mptcp_destroy_sock(sk);
+ goto unlock;
+ }
}
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2508,6 +2660,10 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
+ fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0;
+ if (fail_tout && time_after(jiffies, fail_tout))
+ mptcp_mp_fail_no_response(msk);
+
unlock:
release_sock(sk);
sock_put(sk);
@@ -2531,6 +2687,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
mptcp_pm_data_init(msk);
@@ -2579,8 +2736,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
@@ -2595,7 +2752,7 @@ static void __mptcp_clear_xmit(struct sock *sk)
dfrag_clear(sk, dfrag);
}
-static void mptcp_cancel_work(struct sock *sk)
+void mptcp_cancel_work(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2714,30 +2871,16 @@ static void __mptcp_wr_shutdown(struct sock *sk)
static void __mptcp_destroy_sock(struct sock *sk)
{
- struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
- LIST_HEAD(conn_list);
pr_debug("msk=%p", msk);
might_sleep();
- /* join list will be eventually flushed (with rst) at sock lock release time*/
- list_splice_init(&msk->conn_list, &conn_list);
-
- sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
- /* clears msk->subflow, allowing the following loop to close
- * even the initial subflow
- */
- mptcp_dispose_initial_subflow(msk);
- list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- __mptcp_close_ssk(sk, ssk, subflow, 0);
- }
-
sk->sk_prot->destroy(sk);
WARN_ON_ONCE(msk->rmem_fwd_alloc);
@@ -2749,12 +2892,24 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
-static void mptcp_close(struct sock *sk, long timeout)
+static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+{
+ /* Concurrent splices from sk_receive_queue into receive_queue will
+ * always show at least one non-empty queue when checked in this order.
+ */
+ if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
+ skb_queue_empty_lockless(&msk->receive_queue))
+ return 0;
+
+ return EPOLLIN | EPOLLRDNORM;
+}
+
+bool __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
- lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
@@ -2762,18 +2917,29 @@ static void mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}
- if (mptcp_close_state(sk))
+ if (mptcp_check_readable(msk)) {
+ /* the msk has read data, do the MPTCP equivalent of TCP reset */
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ } else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
+ }
sk_stream_wait_close(sk, timeout);
cleanup:
/* orphan all the subflows */
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ /* since the close timeout takes precedence on the fail one,
+ * cancel the latter
+ */
+ if (ssk == msk->first)
+ subflow->fail_tout = 0;
+
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
}
@@ -2782,14 +2948,25 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN);
+ mptcp_reset_timeout(msk, 0);
}
+
+ return do_cancel_work;
+}
+
+static void mptcp_close(struct sock *sk, long timeout)
+{
+ bool do_cancel_work;
+
+ lock_sock(sk);
+
+ do_cancel_work = __mptcp_close(sk, timeout);
release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
@@ -2797,7 +2974,7 @@ cleanup:
sock_put(sk);
}
-static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
+void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
@@ -2822,24 +2999,20 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
- }
-
- sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
if (mptcp_sk(sk)->token)
mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
- mptcp_destroy_common(msk);
+ /* msk->subflow is still intact, the following will not free the first
+ * subflow
+ */
+ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
msk->last_snd = NULL;
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
@@ -2908,7 +3081,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ atomic64_set(&msk->rcv_wnd_sent, ack_seq);
}
sock_reset_flag(nsk, SOCK_RCU_FREE);
@@ -2989,12 +3162,17 @@ out:
return newsk;
}
-void mptcp_destroy_common(struct mptcp_sock *msk)
+void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
{
+ struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
__mptcp_clear_xmit(sk);
+ /* join list will be eventually flushed (with rst) at sock lock release time */
+ mptcp_for_each_subflow_safe(msk, subflow, tmp)
+ __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
+
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
mptcp_data_lock(sk);
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
@@ -3009,13 +3187,18 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
msk->rmem_fwd_alloc = 0;
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
+ mptcp_free_local_addr_list(msk);
}
static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- mptcp_destroy_common(msk);
+ /* clears msk->subflow, allowing the following to close
+ * even the initial subflow
+ */
+ mptcp_dispose_initial_subflow(msk);
+ mptcp_destroy_common(msk, 0);
sk_sockets_allocated_dec(sk);
}
@@ -3084,15 +3267,19 @@ static void mptcp_release_cb(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
}
- /* be sure to set the current sk state before tacking actions
- * depending on sk_state
- */
- if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
- __mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
- __mptcp_error_report(sk);
+ if (unlikely(&msk->cb_flags)) {
+ /* be sure to set the current sk state before tacking actions
+ * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+ */
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+ __mptcp_set_connected(sk);
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
+ __mptcp_error_report(sk);
+ if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
+ msk->last_snd = NULL;
+ }
__mptcp_update_rmem(sk);
}
@@ -3196,9 +3383,9 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
+ atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
@@ -3229,15 +3416,12 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
}
- if (!msk->pm.server_side)
+ if (!list_empty(&subflow->node))
goto out;
if (!mptcp_pm_allow_new_subflow(msk))
goto err_prohibited;
- if (WARN_ON_ONCE(!list_empty(&subflow->node)))
- goto err_prohibited;
-
/* active connections are already on conn_list.
* If we can't acquire msk socket lock here, let the release callback
* handle it
@@ -3263,6 +3447,7 @@ err_prohibited:
}
subflow->map_seq = READ_ONCE(msk->ack_seq);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
@@ -3294,6 +3479,17 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
return 0;
delta = msk->write_seq - v;
+ if (__mptcp_check_fallback(msk) && msk->first) {
+ struct tcp_sock *tp = tcp_sk(msk->first);
+
+ /* the first subflow is disconnected after close - see
+ * __mptcp_close_ssk(). tcp_disconnect() moves the write_seq
+ * so ignore that status, too.
+ */
+ if (!((1 << msk->first->sk_state) &
+ (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)))
+ delta += READ_ONCE(tp->write_seq) - tp->snd_una;
+ }
if (delta > INT_MAX)
delta = INT_MAX;
@@ -3333,10 +3529,73 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(answ, (int __user *)arg);
}
+static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
+static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
+ int err = -EINVAL;
+
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock)
+ return -EINVAL;
+
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sk, TCP_SYN_SENT);
+ subflow = mptcp_subflow_ctx(ssock->sk);
+#ifdef CONFIG_TCP_MD5SIG
+ /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+ * TCP option space.
+ */
+ if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ mptcp_subflow_early_fallback(msk, subflow);
+#endif
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
+ MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
+ if (likely(!__mptcp_check_fallback(msk)))
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
+
+ /* if reaching here via the fastopen/sendmsg path, the caller already
+ * acquired the subflow socket lock, too.
+ */
+ if (msk->is_sendmsg)
+ err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
+ else
+ err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
+ inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+
+ /* on successful connect, the msk state will be moved to established by
+ * subflow_finish_connect()
+ */
+ if (unlikely(err && err != -EINPROGRESS)) {
+ inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ return err;
+ }
+
+ mptcp_copy_inaddrs(sk, ssock->sk);
+
+ /* unblocking connect, mptcp-level inet_stream_connect will error out
+ * without changing the socket state, update it here.
+ */
+ if (err == -EINPROGRESS)
+ sk->sk_socket->state = ssock->state;
+ return err;
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
.init = mptcp_init_sock,
+ .connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
.accept = mptcp_accept,
@@ -3353,7 +3612,10 @@ static struct proto mptcp_prot = {
.get_port = mptcp_get_port,
.forward_alloc_get = mptcp_forward_alloc_get,
.sockets_allocated = &mptcp_sockets_allocated,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
@@ -3385,77 +3647,16 @@ unlock:
return err;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
- struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct mptcp_subflow_context *subflow;
- struct socket *ssock;
- int err = -EINVAL;
+ int ret;
lock_sock(sock->sk);
- if (uaddr) {
- if (addr_len < sizeof(uaddr->sa_family))
- goto unlock;
-
- if (uaddr->sa_family == AF_UNSPEC) {
- err = mptcp_disconnect(sock->sk, flags);
- sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
- goto unlock;
- }
- }
-
- if (sock->state != SS_UNCONNECTED && msk->subflow) {
- /* pending connection or invalid state, let existing subflow
- * cope with that
- */
- ssock = msk->subflow;
- goto do_connect;
- }
-
- ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
- goto unlock;
-
- mptcp_token_destroy(msk);
- inet_sk_state_store(sock->sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
-#ifdef CONFIG_TCP_MD5SIG
- /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
- * TCP option space.
- */
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
- mptcp_subflow_early_fallback(msk, subflow);
-#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
- }
- if (likely(!__mptcp_check_fallback(msk)))
- MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
-
-do_connect:
- err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
- sock->state = ssock->state;
-
- /* on successful connect, the msk state will be moved to established by
- * subflow_finish_connect()
- */
- if (!err || err == -EINPROGRESS)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
- else
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
-
-unlock:
+ mptcp_sk(sock->sk)->connect_flags = flags;
+ ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
release_sock(sock->sk);
- return err;
+ return ret;
}
static int mptcp_listen(struct socket *sock, int backlog)
@@ -3521,7 +3722,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (mptcp_is_fully_established(newsk))
mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
- mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
mptcp_propagate_sndbuf(newsk, msk->first);
@@ -3540,18 +3740,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
return err;
}
-static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
-{
- /* Concurrent splices from sk_receive_queue into receive_queue will
- * always show at least one non-empty queue when checked in this order.
- */
- if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
- skb_queue_empty_lockless(&msk->receive_queue))
- return 0;
-
- return EPOLLIN | EPOLLRDNORM;
-}
-
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -3593,13 +3781,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
mask |= mptcp_check_writeable(msk);
+ } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ /* cf tcp_poll() note about TFO */
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
- /* This barrier is coupled with smp_wmb() in tcp_reset() */
+ /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
if (sk->sk_err)
mask |= EPOLLERR;
@@ -3684,8 +3875,8 @@ void __init mptcp_proto_init(void)
for_each_possible_cpu(cpu) {
delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
INIT_LIST_HEAD(&delegated->head);
- netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(&mptcp_napi_dev, &delegated->napi,
+ mptcp_napi_poll);
napi_enable(&delegated->napi);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0e6b42c76ea0..6a09ab99a12d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -11,6 +11,7 @@
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
+#include <net/genetlink.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -82,7 +83,6 @@
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
-#define MPTCPOPT_HMAC_LEN 20
#define MPTCPOPT_THMAC_LEN 8
/* MPTCP MP_CAPABLE flags */
@@ -124,6 +124,7 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
+#define MPTCP_RESET_SCHEDULER 7
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -182,6 +183,14 @@ enum mptcp_pm_status {
*/
};
+enum mptcp_pm_type {
+ MPTCP_PM_TYPE_KERNEL = 0,
+ MPTCP_PM_TYPE_USERSPACE,
+
+ __MPTCP_PM_TYPE_NR,
+ __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
+};
+
/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
@@ -198,6 +207,7 @@ struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
struct list_head anno_list;
+ struct list_head userspace_pm_local_addr_list;
spinlock_t lock; /*protects the whole PM data */
@@ -210,6 +220,7 @@ struct mptcp_pm_data {
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
+ u8 pm_type;
u8 subflows;
u8 status;
DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
@@ -217,6 +228,14 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_addr_entry {
+ struct list_head list;
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+ struct socket *lsk;
+};
+
struct mptcp_data_frag {
struct list_head list;
u64 data_seq;
@@ -236,7 +255,7 @@ struct mptcp_sock {
u64 write_seq;
u64 snd_nxt;
u64 ack_seq;
- u64 rcv_wnd_sent;
+ atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
int rmem_fwd_alloc;
struct sock *last_snd;
@@ -262,9 +281,13 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
+ bool allow_infinite_fallback;
+ u8 mpc_endpoint_id;
u8 recvmsg_inq:1,
cork:1,
- nodelay:1;
+ nodelay:1,
+ is_sendmsg:1;
+ int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -285,6 +308,7 @@ struct mptcp_sock {
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+ struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -292,6 +316,8 @@ struct mptcp_sock {
#define mptcp_for_each_subflow(__msk, __subflow) \
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \
+ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
static inline void msk_owned_by_me(const struct mptcp_sock *msk)
{
@@ -408,7 +434,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
- char reset_start[0];
+ struct_group(reset,
unsigned long avg_pacing_rate; /* protected by msk socket lock */
u64 local_key;
@@ -439,10 +465,13 @@ struct mptcp_subflow_context {
send_mp_prio : 1,
send_mp_fail : 1,
send_fastclose : 1,
+ send_infinite_map : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
- stale : 1; /* unable to snd/rcv data, do not use for xmit */
+ stale : 1, /* unable to snd/rcv data, do not use for xmit */
+ local_id_valid : 1, /* local_id is correctly initialized */
+ valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -457,8 +486,9 @@ struct mptcp_subflow_context {
u8 stale_count;
long delegated_status;
+ unsigned long fail_tout;
- char reset_end[0];
+ );
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
@@ -468,9 +498,7 @@ struct mptcp_subflow_context {
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops;
- void (*tcp_data_ready)(struct sock *sk);
void (*tcp_state_change)(struct sock *sk);
- void (*tcp_write_space)(struct sock *sk);
void (*tcp_error_report)(struct sock *sk);
struct rcu_head rcu;
@@ -494,7 +522,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
static inline void
mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
- memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start);
+ memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
}
@@ -572,6 +600,8 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
+int mptcp_get_pm_type(const struct net *net);
+void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -582,10 +612,16 @@ void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
-void mptcp_subflow_send_ack(struct sock *ssk);
+void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+bool __mptcp_close(struct sock *sk, long timeout);
+void mptcp_cancel_work(struct sock *sk);
+
+bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
@@ -595,16 +631,19 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
-static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+static inline bool __tcp_can_send(const struct sock *ssk)
{
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ /* only send if our side has not closed yet */
+ return ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
+}
+static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+{
/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
if (subflow->request_join && !subflow->fully_established)
return false;
- /* only send if our side has not closed yet */
- return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
+ return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
}
void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
@@ -614,27 +653,14 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
- sk->sk_data_ready = ctx->tcp_data_ready;
+ sk->sk_data_ready = sock_def_readable;
sk->sk_state_change = ctx->tcp_state_change;
- sk->sk_write_space = ctx->tcp_write_space;
+ sk->sk_write_space = sk_stream_write_space;
sk->sk_error_report = ctx->tcp_error_report;
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
-static inline bool mptcp_has_another_subflow(struct sock *ssk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
- mptcp_for_each_subflow(msk, tmp) {
- if (tmp != subflow)
- return true;
- }
-
- return false;
-}
-
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
@@ -643,12 +669,12 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
-void mptcp_get_options(const struct sock *sk,
- const struct sk_buff *skb,
+void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk);
+void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
@@ -701,7 +727,7 @@ static inline void mptcp_write_space(struct sock *sk)
}
}
-void mptcp_destroy_common(struct mptcp_sock *msk);
+void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4
@@ -725,11 +751,16 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_data_reset(struct mptcp_sock *msk);
+int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
+ struct mptcp_addr_info *addr);
+int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
+ bool require_family,
+ struct mptcp_pm_addr_entry *entry);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
@@ -740,37 +771,60 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
const struct mptcp_subflow_context *subflow);
-void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr);
+ const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ struct mptcp_addr_info *rem,
+ u8 bkup);
+bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
+ const struct mptcp_pm_addr_entry *entry);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr, bool check_id);
+ const struct mptcp_addr_info *addr, bool check_id);
struct mptcp_pm_add_entry *
-mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr);
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
u8 *flags, int *ifindex);
-
+int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
+ u8 *flags, int *ifindex);
+int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list);
+
+int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry);
+void mptcp_free_local_addr_list(struct mptcp_sock *msk);
+int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
-void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
+void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
@@ -793,6 +847,16 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
}
+static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
+}
+
+static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL;
+}
+
static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
{
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
@@ -816,24 +880,38 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
unsigned int opt_size, unsigned int remaining,
struct mptcp_addr_info *addr, bool *echo,
- bool *port, bool *drop_other_suboptions);
+ bool *drop_other_suboptions);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
void __init mptcp_pm_nl_init(void);
-void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
-unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
-unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
-unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
-unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
+
+/* called under PM lock */
+static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+ if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
+ WRITE_ONCE(msk->pm.accept_subflow, true);
+}
+
+static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+ spin_lock_bh(&msk->pm.lock);
+ __mptcp_pm_close_subflow(msk);
+ spin_unlock_bh(&msk->pm.lock);
+}
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
@@ -867,23 +945,51 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
-static inline void mptcp_do_fallback(struct sock *sk)
+static inline void mptcp_do_fallback(struct sock *ssk)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
+ msk = mptcp_sk(sk);
__mptcp_do_fallback(msk);
+ if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
+ gfp_t saved_allocation = ssk->sk_allocation;
+
+ /* we are in a atomic (BH) scope, override ssk default for data
+ * fin allocation
+ */
+ ssk->sk_allocation = GFP_ATOMIC;
+ ssk->sk_shutdown |= SEND_SHUTDOWN;
+ tcp_shutdown(ssk, SEND_SHUTDOWN);
+ ssk->sk_allocation = saved_allocation;
+ }
}
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
+{
+ struct mptcp_ext *mpext;
+
+ mpext = skb ? mptcp_get_ext(skb) : NULL;
+ if (mpext && mpext->infinite_map)
+ return true;
+
+ return false;
+}
+
+static inline bool is_active_ssk(struct mptcp_subflow_context *subflow)
+{
+ return (subflow->request_mptcp || subflow->request_join);
+}
+
static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sock *parent = subflow->conn;
return sk->sk_state == TCP_ESTABLISHED &&
- !mptcp_sk(parent)->pm.server_side &&
+ is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index dacf3cee0027..c7cb68c725b2 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -343,6 +343,8 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_RCVLOWAT:
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
+ case SO_SNDTIMEO_OLD:
+ case SO_SNDTIMEO_NEW:
case SO_BUSY_POLL:
case SO_PREFER_BUSY_POLL:
case SO_BUSY_POLL_BUDGET:
@@ -557,6 +559,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
case TCP_INQ:
+ case TCP_FASTOPEN_CONNECT:
return true;
}
@@ -565,7 +568,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
* TCP_REPAIR_WINDOW are not supported, better avoid this mess
*/
- /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE,
+ /* TCP_FASTOPEN_KEY, TCP_FASTOPEN, TCP_FASTOPEN_NO_COOKIE,
* are not supported fastopen is currently unsupported
*/
}
@@ -754,6 +757,31 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct socket *listener;
+
+ listener = __mptcp_nmpc_socket(msk);
+ if (!listener)
+ return 0; /* TCP_DEFER_ACCEPT does not fail */
+
+ return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen);
+}
+
+static int mptcp_setsockopt_sol_tcp_fastopen_connect(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct socket *sock;
+
+ /* Limit to first subflow */
+ sock = __mptcp_nmpc_socket(msk);
+ if (!sock)
+ return -EINVAL;
+
+ return tcp_setsockopt(sock->sk, SOL_TCP, TCP_FASTOPEN_CONNECT, optval, optlen);
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -780,6 +808,10 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
case TCP_NODELAY:
return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
+ case TCP_DEFER_ACCEPT:
+ return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen);
+ case TCP_FASTOPEN_CONNECT:
+ return mptcp_setsockopt_sol_tcp_fastopen_connect(msk, optval, optlen);
}
return -EOPNOTSUPP;
@@ -851,15 +883,11 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
u32 flags = 0;
- bool slow;
u8 val;
memset(info, 0, sizeof(*info));
- slow = lock_sock_fast(sk);
-
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
@@ -880,8 +908,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
-
- unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -1146,6 +1172,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_CONGESTION:
case TCP_INFO:
case TCP_CC_INFO:
+ case TCP_DEFER_ACCEPT:
+ case TCP_FASTOPEN_CONNECT:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
case TCP_INQ:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bea47a1180dc..02a54d59697b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{
return mptcp_is_fully_established((void *)msk) &&
- READ_ONCE(msk->pm.accept_subflow);
+ ((mptcp_pm_is_userspace(msk) &&
+ mptcp_userspace_pm_active(msk)) ||
+ READ_ONCE(msk->pm.accept_subflow));
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
@@ -153,7 +155,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(sk_listener, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
@@ -250,7 +252,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(sk_listener, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
@@ -344,9 +346,7 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
thmac = get_unaligned_be64(hmac);
pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
- subflow, subflow->token,
- (unsigned long long)thmac,
- (unsigned long long)subflow->thmac);
+ subflow, subflow->token, thmac, subflow->thmac);
return thmac == subflow->thmac;
}
@@ -410,7 +410,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
MPTCP_INC_STATS(sock_net(sk),
@@ -443,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
+ subflow->remote_id = mp_opt.join_id;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -483,9 +484,53 @@ do_reset:
mptcp_subflow_reset(sk);
}
+static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
+{
+ subflow->local_id = local_id;
+ subflow->local_id_valid = 1;
+}
+
+static int subflow_chk_local_id(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ int err;
+
+ if (likely(subflow->local_id_valid))
+ return 0;
+
+ err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
+ if (err < 0)
+ return err;
+
+ subflow_set_local_id(subflow, err);
+ return 0;
+}
+
+static int subflow_rebuild_header(struct sock *sk)
+{
+ int err = subflow_chk_local_id(sk);
+
+ if (unlikely(err < 0))
+ return err;
+
+ return inet_sk_rebuild_header(sk);
+}
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int subflow_v6_rebuild_header(struct sock *sk)
+{
+ int err = subflow_chk_local_id(sk);
+
+ if (unlikely(err < 0))
+ return err;
+
+ return inet6_sk_rebuild_header(sk);
+}
+#endif
+
struct request_sock_ops mptcp_subflow_request_sock_ops;
-EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
-static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
+static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
@@ -506,9 +551,9 @@ drop:
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
-static struct inet_connection_sock_af_ops subflow_v6_specific;
-static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
+static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
+static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
static struct proto tcpv6_prot_override;
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -557,29 +602,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
-static void mptcp_sock_destruct(struct sock *sk)
-{
- /* if new mptcp socket isn't accepted, it is free'd
- * from the tcp listener sockets request queue, linked
- * from req->sk. The tcp socket is released.
- * This calls the ULP release function which will
- * also remove the mptcp socket, via
- * sock_put(ctx->conn).
- *
- * Problem is that the mptcp socket will be in
- * ESTABLISHED state and will not have the SOCK_DEAD flag.
- * Both result in warnings from inet_sock_destruct.
- */
- if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
- sk->sk_state = TCP_CLOSE;
- WARN_ON_ONCE(sk->sk_socket);
- sock_orphan(sk);
- }
-
- mptcp_destroy_common(mptcp_sk(sk));
- inet_sock_destruct(sk);
-}
-
static void mptcp_force_close(struct sock *sk)
{
/* the msk is not yet exposed to user-space */
@@ -663,7 +685,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
fallback = true;
goto create_child;
@@ -673,7 +695,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
@@ -701,6 +723,8 @@ create_child:
goto dispose_child;
}
+ if (new_msk)
+ mptcp_copy_inaddrs(new_msk, child);
subflow_drop_ctx(child);
goto out;
}
@@ -722,13 +746,17 @@ create_child:
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
- new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
+ /* set msk addresses early to ensure mptcp_pm_get_local_id()
+ * uses the correct data
+ */
+ mptcp_copy_inaddrs(ctx->conn, child);
+
/* with OoO packets we can reach here without ingress
* mpc option
*/
@@ -790,7 +818,7 @@ dispose_child:
return child;
}
-static struct inet_connection_sock_af_ops subflow_specific;
+static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
static struct proto tcp_prot_override;
enum mapping_status {
@@ -798,7 +826,8 @@ enum mapping_status {
MAPPING_INVALID,
MAPPING_EMPTY,
MAPPING_DATA_FIN,
- MAPPING_DUMMY
+ MAPPING_DUMMY,
+ MAPPING_BAD_CSUM
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -846,7 +875,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
u32 offset, seq, delta;
- u16 csum;
+ __sum16 csum;
int len;
if (!csum_reqd)
@@ -913,11 +942,10 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
- return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ return MAPPING_BAD_CSUM;
}
+ subflow->valid_csum_seen = 1;
return MAPPING_OK;
}
@@ -964,7 +992,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
+ pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+ subflow->map_data_len = 0;
return MAPPING_INVALID;
}
@@ -1099,6 +1129,45 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
}
}
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (subflow->mp_join)
+ return false;
+ else if (READ_ONCE(msk->csum_enabled))
+ return !subflow->valid_csum_seen;
+ else
+ return !subflow->fully_established;
+}
+
+static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ unsigned long fail_tout;
+
+ /* greceful failure can happen only on the MPC subflow */
+ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
+ return;
+
+ /* since the close timeout take precedence on the fail one,
+ * no need to start the latter when the first is already set
+ */
+ if (sock_flag((struct sock *)msk, SOCK_DEAD))
+ return;
+
+ /* we don't need extreme accuracy here, use a zero fail_tout as special
+ * value meaning no fail timeout at all;
+ */
+ fail_tout = jiffies + TCP_RTO_MAX;
+ if (!fail_tout)
+ fail_tout = 1;
+ WRITE_ONCE(subflow->fail_tout, fail_tout);
+ tcp_send_ack(ssk);
+
+ mptcp_reset_timeout(msk, subflow->fail_tout);
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1107,7 +1176,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
if (subflow->data_avail)
return true;
@@ -1118,10 +1187,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (unlikely(status == MAPPING_INVALID))
- goto fallback;
-
- if (unlikely(status == MAPPING_DUMMY))
+ if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
+ status == MAPPING_BAD_CSUM))
goto fallback;
if (status != MAPPING_OK)
@@ -1161,35 +1228,42 @@ no_data:
return false;
fallback:
- /* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
+ if (!__mptcp_check_fallback(msk)) {
+ /* RFC 8684 section 3.7. */
+ if (status == MAPPING_BAD_CSUM &&
+ (subflow->mp_join || subflow->valid_csum_seen)) {
+ subflow->send_mp_fail = 1;
+
+ if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ goto reset;
+ }
+ mptcp_subflow_fail(msk, ssk);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ return true;
+ }
+
+ if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+
+reset:
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return false;
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, 0);
- return true;
- }
- if (subflow->mp_join || subflow->fully_established) {
- /* fatal protocol error, close the socket.
- * subflow_error_report() will introduce the appropriate barriers
- */
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, 0);
- return false;
+ mptcp_do_fallback(ssk);
}
- __mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1207,7 +1281,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1311,7 +1385,7 @@ static void subflow_write_space(struct sock *ssk)
mptcp_write_space(sk);
}
-static struct inet_connection_sock_af_ops *
+static const struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1326,7 +1400,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_connection_sock_af_ops *target;
+ const struct inet_connection_sock_af_ops *target;
target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
@@ -1380,20 +1454,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sockaddr_storage addr;
int remote_id = remote->id;
int local_id = loc->id;
+ int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
int ifindex;
u8 flags;
- int err;
if (!mptcp_is_fully_established(sk))
- return -ENOTCONN;
+ goto err_out;
err = mptcp_subflow_create_socket(sk, &sf);
if (err)
- return err;
+ goto err_out;
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1401,15 +1475,10 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (!local_id) {
- err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
- if (err < 0)
- goto failed;
+ if (local_id)
+ subflow_set_local_id(subflow, local_id);
- local_id = err;
- }
-
- mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
@@ -1432,7 +1501,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->local_id = local_id;
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
@@ -1447,7 +1515,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- return err;
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ return 0;
failed_unlink:
list_del(&subflow->node);
@@ -1456,6 +1525,12 @@ failed_unlink:
failed:
subflow->disposable = 1;
sock_release(sf);
+
+err_out:
+ /* we account subflows before the creation, and this failures will not
+ * be caught by sk_state_change()
+ */
+ mptcp_pm_close_subflow(msk);
return err;
}
@@ -1542,7 +1617,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
- * procfs/diag interaces really show this one belonging to the correct
+ * procfs/diag interfaces really show this one belonging to the correct
* user.
*/
SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
@@ -1631,6 +1706,64 @@ static void subflow_state_change(struct sock *sk)
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ msk = mptcp_sk(subflow->conn);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ struct sock *sk = (struct sock *)msk;
+ bool slow, do_cancel_work;
+
+ sock_hold(sk);
+ slow = lock_sock_fast_nested(sk);
+ next = msk->dl_next;
+ msk->first = NULL;
+ msk->dl_next = NULL;
+
+ do_cancel_work = __mptcp_close(sk, 0);
+ unlock_sock_fast(sk, slow);
+ if (do_cancel_work)
+ mptcp_cancel_work(sk);
+ sock_put(sk);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1657,10 +1790,12 @@ static int subflow_ulp_init(struct sock *sk)
tp->is_mptcp = 1;
ctx->icsk_af_ops = icsk->icsk_af_ops;
icsk->icsk_af_ops = subflow_default_af_ops(sk);
- ctx->tcp_data_ready = sk->sk_data_ready;
ctx->tcp_state_change = sk->sk_state_change;
- ctx->tcp_write_space = sk->sk_write_space;
ctx->tcp_error_report = sk->sk_error_report;
+
+ WARN_ON_ONCE(sk->sk_data_ready != sock_def_readable);
+ WARN_ON_ONCE(sk->sk_write_space != sk_stream_write_space);
+
sk->sk_data_ready = subflow_data_ready;
sk->sk_write_space = subflow_write_space;
sk->sk_state_change = subflow_state_change;
@@ -1715,9 +1850,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->conn_finished = 1;
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
- new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
- new_ctx->tcp_write_space = old_ctx->tcp_write_space;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
new_ctx->tcp_sock = newsk;
@@ -1731,15 +1864,22 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->token = subflow_req->token;
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->idsn = subflow_req->idsn;
+
+ /* this is the first subflow, id is always 0 */
+ new_ctx->local_id_valid = 1;
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->local_id = subflow_req->local_id;
new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
+
+ /* the subflow req id is valid, fetched via subflow_check_req()
+ * and subflow_token_join_request()
+ */
+ subflow_set_local_id(new_ctx, subflow_req->local_id);
}
}
@@ -1792,6 +1932,7 @@ void __init mptcp_subflow_init(void)
subflow_specific.conn_request = subflow_v4_conn_request;
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
+ subflow_specific.rebuild_header = subflow_rebuild_header;
tcp_prot_override = tcp_prot;
tcp_prot_override.release_cb = tcp_release_cb_override;
@@ -1804,6 +1945,7 @@ void __init mptcp_subflow_init(void)
subflow_v6_specific.conn_request = subflow_v6_conn_request;
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
+ subflow_v6_specific.rebuild_header = subflow_v6_rebuild_header;
subflow_v6m_specific = subflow_v6_specific;
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
@@ -1811,6 +1953,7 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
subflow_v6m_specific.net_frag_header_len = 0;
+ subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
tcpv6_prot_override = tcpv6_prot;
tcpv6_prot_override.release_cb = tcp_release_cb_override;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 7121ce2a47c0..80713febfac6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
bitmap = &ncf->bitmap;
spin_lock_irqsave(&nc->lock, flags);
- index = find_next_bit(bitmap, ncf->n_vids, 0);
+ index = find_first_bit(bitmap, ncf->n_vids);
if (index >= ncf->n_vids) {
spin_unlock_irqrestore(&nc->lock, flags);
return -1;
@@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return -1;
}
- index = find_next_zero_bit(bitmap, ncf->n_vids, 0);
+ index = find_first_zero_bit(bitmap, ncf->n_vids);
if (index < 0 || index >= ncf->n_vids) {
netdev_err(ndp->ndev.dev,
"Channel %u already has all VLAN filters set\n",
@@ -1803,7 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
pdev = to_platform_device(dev->dev.parent);
if (pdev) {
np = pdev->dev.of_node;
- if (np && of_get_property(np, "mlx,multi-host", NULL))
+ if (np && (of_get_property(np, "mellanox,multi-host", NULL) ||
+ of_get_property(np, "mlx,multi-host", NULL)))
ndp->mlx_multi_host = true;
}
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index c189b4c8a182..d27f4eccce6d 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -768,6 +768,7 @@ static struct genl_family ncsi_genl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = ncsi_ops,
.n_small_ops = ARRAY_SIZE(ncsi_ops),
+ .resv_start_op = NCSI_CMD_SET_CHANNEL_MASK + 1,
};
static int __init ncsi_init_netlink(void)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ddc54b6d18ee..4b8d04640ff3 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -144,7 +144,6 @@ config NF_CONNTRACK_ZONES
config NF_CONNTRACK_PROCFS
bool "Supply CT list in procfs (OBSOLETE)"
- default y
depends on PROC_FS
help
This option enables for the list of known conntrack entries
@@ -734,6 +733,14 @@ config NF_FLOW_TABLE
To compile it as a module, choose M here.
+config NF_FLOW_TABLE_PROCFS
+ bool "Supply flow table statistics in procfs"
+ depends on NF_FLOW_TABLE
+ depends on PROC_FS
+ help
+ This option enables for the flow table offload statistics
+ to be shown in procfs under net/netfilter/nf_flowtable.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a135b1a46014..0f060d100880 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+ifeq ($(CONFIG_NF_CONNTRACK),m)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
+else ifeq ($(CONFIG_NF_CONNTRACK),y)
+nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
+endif
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -55,6 +60,12 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o
nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
+ifeq ($(CONFIG_NF_NAT),m)
+nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o
+else ifeq ($(CONFIG_NF_NAT),y)
+nf_nat-$(CONFIG_DEBUG_INFO_BTF) += nf_nat_bpf.o
+endif
+
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -123,6 +134,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
nf_flow_table_offload.o
+nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 354cb472f386..5a6705a0e4ec 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -58,7 +58,7 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
if (num == 0)
return NULL;
- e = kvzalloc(alloc, GFP_KERNEL);
+ e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
if (e)
e->num_hook_entries = num;
return e;
@@ -300,12 +300,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
return NULL;
return net->nf.hooks_ipv6 + hooknum;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
- return NULL;
- return net->nf.hooks_decnet + hooknum;
-#endif
default:
WARN_ON_ONCE(1);
return NULL;
@@ -428,14 +422,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
p = nf_entry_dereference(*pp);
new_hooks = nf_hook_entries_grow(p, reg);
- if (!IS_ERR(new_hooks))
+ if (!IS_ERR(new_hooks)) {
+ hooks_validate(new_hooks);
rcu_assign_pointer(*pp, new_hooks);
+ }
mutex_unlock(&nf_hook_mutex);
if (IS_ERR(new_hooks))
return PTR_ERR(new_hooks);
- hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
@@ -621,7 +616,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
case NF_ACCEPT:
break;
case NF_DROP:
- kfree_skb(skb);
+ kfree_skb_reason(skb,
+ SKB_DROP_REASON_NETFILTER_DROP);
ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
@@ -748,10 +744,6 @@ static int __net_init netfilter_net_init(struct net *net)
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
__netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
-#endif
-
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 16ae92054baa..e7ba5b6dd2b7 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -353,7 +353,7 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
- strlcpy(c->str, ext->comment, len + 1);
+ strscpy(c->str, ext->comment, len + 1);
set->ext_size += sizeof(*c) + strlen(c->str) + 1;
rcu_assign_pointer(comment->c, c);
}
@@ -1072,7 +1072,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
if (!set)
return -ENOMEM;
spin_lock_init(&set->lock);
- strlcpy(set->name, name, IPSET_MAXNAMELEN);
+ strscpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family;
set->revision = revision;
@@ -1719,11 +1719,13 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
skb2 = nlmsg_new(payload, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
- rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
+ rep = nlmsg_put(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = ret;
- memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
+ unsafe_memcpy(&errmsg->msg, nlh, nlh->nlmsg_len,
+ /* Bounds checked by the skb layer. */);
+
cmdattr = (void *)&errmsg->msg + min_len;
ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6e391308431d..3adc291d9ce1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -42,31 +42,8 @@
#define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE)
/* Max muber of elements in the array block when tuned */
#define AHASH_MAX_TUNED 64
-
#define AHASH_MAX(h) ((h)->bucketsize)
-/* Max number of elements can be tuned */
-#ifdef IP_SET_HASH_WITH_MULTI
-static u8
-tune_bucketsize(u8 curr, u32 multi)
-{
- u32 n;
-
- if (multi < curr)
- return curr;
-
- n = curr + AHASH_INIT_SIZE;
- /* Currently, at listing one hash bucket must fit into a message.
- * Therefore we have a hard limit here.
- */
- return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
-}
-#define TUNE_BUCKETSIZE(h, multi) \
- ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi))
-#else
-#define TUNE_BUCKETSIZE(h, multi)
-#endif
-
/* A hash bucket */
struct hbucket {
struct rcu_head rcu; /* for call_rcu */
@@ -936,7 +913,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
goto set_full;
/* Create a new slot */
if (n->pos >= n->size) {
- TUNE_BUCKETSIZE(h, multi);
+#ifdef IP_SET_HASH_WITH_MULTI
+ if (h->bucketsize >= AHASH_MAX_TUNED)
+ goto set_full;
+ else if (h->bucketsize < multi)
+ h->bucketsize += AHASH_INIT_SIZE;
+#endif
if (n->size >= AHASH_MAX(h)) {
/* Trigger rehashing */
mtype_data_next(&h->next, d);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index f9b16f2b2219..fdacbc3c15be 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = {
int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
INIT_LIST_HEAD(&ipvs->app_list);
- proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
- sizeof(struct seq_net_private));
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net,
+ &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private)))
+ return -ENOMEM;
+#endif
return 0;
}
void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
unregister_ip_vs_app(ipvs, NULL /* all */);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
+#endif
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 2c467c422dc6..13534e02346c 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1265,8 +1265,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
* The drop rate array needs tuning for real environments.
* Called from timer bh only => no locking
*/
- static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
- static char todrop_counter[9] = {0};
+ static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ static signed char todrop_counter[9] = {0};
int i;
/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
@@ -1308,7 +1308,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
* Randomly scan 1/32 of the whole table every second
*/
for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
- unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask;
+ unsigned int hash = get_random_u32() & ip_vs_conn_tab_mask;
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->ipvs != ipvs)
@@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
atomic_set(&ipvs->conn_count, 0);
- proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
- &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state));
- proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
- &ip_vs_conn_sync_seq_ops,
- sizeof(struct ip_vs_iter_state));
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+ &ip_vs_conn_seq_ops,
+ sizeof(struct ip_vs_iter_state)))
+ goto err_conn;
+
+ if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state)))
+ goto err_conn_sync;
+#endif
+
return 0;
+
+#ifdef CONFIG_PROC_FS
+err_conn_sync:
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+err_conn:
+ return -ENOMEM;
+#endif
}
void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
{
/* flush all the connection entries first */
ip_vs_conn_flush(ipvs);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
+#endif
}
int __init ip_vs_conn_init(void)
@@ -1495,7 +1511,7 @@ int __init ip_vs_conn_init(void)
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7f645328b47f..988222fff9f0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1767,8 +1767,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
-static int three = 3;
-
static int
proc_do_defense_mode(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -1977,7 +1975,7 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "nat_icmp_send",
@@ -2613,7 +2611,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
+ strscpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2807,13 +2805,13 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
mutex_lock(&ipvs->sync_mutex);
if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
+ strscpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
sizeof(d[0].mcast_ifn));
d[0].syncid = ipvs->mcfg.syncid;
}
if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
+ strscpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
sizeof(d[1].mcast_ifn));
d[1].syncid = ipvs->bcfg.syncid;
}
@@ -3563,7 +3561,7 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
- strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+ strscpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
sizeof(c.mcast_ifn));
c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
@@ -4007,6 +4005,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = ip_vs_genl_ops,
.n_small_ops = ARRAY_SIZE(ip_vs_genl_ops),
+ .resv_start_op = IPVS_CMD_FLUSH + 1,
};
static int __init ip_vs_genl_register(void)
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
index da0280cec506..e3d7f5c879ce 100644
--- a/net/netfilter/ipvs/ip_vs_mh.c
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -174,8 +174,7 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
return 0;
}
- table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
- sizeof(unsigned long), GFP_KERNEL);
+ table = bitmap_zalloc(IP_VS_MH_TAB_SIZE, GFP_KERNEL);
if (!table)
return -ENOMEM;
@@ -227,7 +226,7 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
}
out:
- kfree(table);
+ bitmap_free(table);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9d43277b8b4f..a56fd0b5a430 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
lock_sock(sk);
if (mode) {
val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
- sysctl_wmem_max);
+ READ_ONCE(sysctl_wmem_max));
sk->sk_sndbuf = val * 2;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
} else {
val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
- sysctl_rmem_max);
+ READ_ONCE(sysctl_rmem_max));
sk->sk_rcvbuf = val * 2;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
index acb55d8393ef..f2579fc9c75b 100644
--- a/net/netfilter/ipvs/ip_vs_twos.c
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -71,8 +71,8 @@ static struct ip_vs_dest *ip_vs_twos_schedule(struct ip_vs_service *svc,
* from 0 to total_weight
*/
total_weight += 1;
- rweight1 = prandom_u32() % total_weight;
- rweight2 = prandom_u32() % total_weight;
+ rweight1 = prandom_u32_max(total_weight);
+ rweight2 = prandom_u32_max(total_weight);
/* Pick two weighted servers */
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index d2e5a8f644b8..029171379884 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -610,7 +610,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
nf_reset_ct(skb);
skb_forward_csum(skb);
if (skb->dev)
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
}
return ret;
}
@@ -652,7 +652,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
if (skb->dev)
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -674,7 +674,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
if (skb->dev)
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 82f36beb2e76..5d8ed6c90b7e 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -132,6 +132,9 @@ static int __nf_conncount_add(struct net *net,
struct nf_conn *found_ct;
unsigned int collect = 0;
+ if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
+ goto add_new_node;
+
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
@@ -177,6 +180,7 @@ static int __nf_conncount_add(struct net *net,
nf_ct_put(found_ct);
}
+add_new_node:
if (WARN_ON_ONCE(list->count > INT_MAX))
return -EOVERFLOW;
@@ -190,6 +194,7 @@ static int __nf_conncount_add(struct net *net,
conn->jiffies32 = (u32)jiffies;
list_add_tail(&conn->node, &list->head);
list->count++;
+ list->last_gc = (u32)jiffies;
return 0;
}
@@ -214,6 +219,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
+ list->last_gc = (u32)jiffies;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -227,6 +233,10 @@ bool nf_conncount_gc_list(struct net *net,
unsigned int collected = 0;
bool ret = false;
+ /* don't bother if we just did GC */
+ if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
+ return false;
+
/* don't bother if other cpu is already doing GC */
if (!spin_trylock(&list->list_lock))
return false;
@@ -258,6 +268,7 @@ bool nf_conncount_gc_list(struct net *net,
if (!list->count)
ret = true;
+ list->last_gc = (u32)jiffies;
spin_unlock(&list->list_lock);
return ret;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 91bc8df3e4b0..385a5f458aba 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -22,26 +22,7 @@ static bool nf_ct_acct __read_mostly;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
-static const struct nf_ct_ext_type acct_extend = {
- .len = sizeof(struct nf_conn_acct),
- .align = __alignof__(struct nf_conn_acct),
- .id = NF_CT_EXT_ACCT,
-};
-
void nf_conntrack_acct_pernet_init(struct net *net)
{
net->ct.sysctl_acct = nf_ct_acct;
}
-
-int nf_conntrack_acct_init(void)
-{
- int ret = nf_ct_extend_register(&acct_extend);
- if (ret < 0)
- pr_err("Unable to register extension\n");
- return ret;
-}
-
-void nf_conntrack_acct_fini(void)
-{
- nf_ct_extend_unregister(&acct_extend);
-}
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
new file mode 100644
index 000000000000..8639e7efd0e2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable Conntrack Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/btf_ids.h>
+#include <linux/net_namespace.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+/* bpf_ct_opts - Options for CT lookup helpers
+ *
+ * Members:
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx (xdp_md, __sk_buff)
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - Passed NULL for bpf_tuple pointer
+ * -EINVAL - opts->reserved is not 0
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
+ * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - Conntrack lookup could not find entry for tuple
+ * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ * or sizeof(tuple->ipv6)
+ * @l4proto - Layer 4 protocol
+ * Values:
+ * IPPROTO_TCP, IPPROTO_UDP
+ * @dir: - connection tracking tuple direction.
+ * @reserved - Reserved member, will be reused for more options in future
+ * Values:
+ * 0
+ */
+struct bpf_ct_opts {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u8 reserved[2];
+};
+
+enum {
+ NF_BPF_CT_OPTS_SZ = 12,
+};
+
+static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, u8 protonum, u8 dir,
+ struct nf_conntrack_tuple *tuple)
+{
+ union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
+ union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
+ union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
+ : &tuple->src.u;
+ union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
+ : (void *)&tuple->dst.u;
+
+ if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
+ return -EPROTO;
+
+ memset(tuple, 0, sizeof(*tuple));
+
+ switch (tuple_len) {
+ case sizeof(bpf_tuple->ipv4):
+ tuple->src.l3num = AF_INET;
+ src->ip = bpf_tuple->ipv4.saddr;
+ sport->tcp.port = bpf_tuple->ipv4.sport;
+ dst->ip = bpf_tuple->ipv4.daddr;
+ dport->tcp.port = bpf_tuple->ipv4.dport;
+ break;
+ case sizeof(bpf_tuple->ipv6):
+ tuple->src.l3num = AF_INET6;
+ memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
+ sport->tcp.port = bpf_tuple->ipv6.sport;
+ memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
+ dport->tcp.port = bpf_tuple->ipv6.dport;
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ tuple->dst.protonum = protonum;
+ tuple->dst.dir = dir;
+
+ return 0;
+}
+
+static struct nf_conn *
+__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
+ u32 timeout)
+{
+ struct nf_conntrack_tuple otuple, rtuple;
+ struct nf_conn *ct;
+ int err;
+
+ if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts_len != NF_BPF_CT_OPTS_SZ)
+ return ERR_PTR(-EINVAL);
+
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+ IP_CT_DIR_ORIGINAL, &otuple);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+ IP_CT_DIR_REPLY, &rtuple);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
+ if (unlikely(!net))
+ return ERR_PTR(-ENONET);
+ }
+
+ ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+ GFP_ATOMIC);
+ if (IS_ERR(ct))
+ goto out;
+
+ memset(&ct->proto, 0, sizeof(ct->proto));
+ __nf_ct_set_timeout(ct, timeout * HZ);
+
+out:
+ if (opts->netns_id >= 0)
+ put_net(net);
+
+ return ct;
+}
+
+static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple_len, struct bpf_ct_opts *opts,
+ u32 opts_len)
+{
+ struct nf_conntrack_tuple_hash *hash;
+ struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct;
+ int err;
+
+ if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
+ opts_len != NF_BPF_CT_OPTS_SZ)
+ return ERR_PTR(-EINVAL);
+ if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
+ return ERR_PTR(-EPROTO);
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
+ return ERR_PTR(-EINVAL);
+
+ err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
+ IP_CT_DIR_ORIGINAL, &tuple);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
+ if (unlikely(!net))
+ return ERR_PTR(-ENONET);
+ }
+
+ hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+ if (opts->netns_id >= 0)
+ put_net(net);
+ if (!hash)
+ return ERR_PTR(-ENOENT);
+
+ ct = nf_ct_tuplehash_to_ctrack(hash);
+ opts->dir = NF_CT_DIRECTION(hash);
+
+ return ct;
+}
+
+BTF_ID_LIST(btf_nf_conn_ids)
+BTF_ID(struct, nf_conn)
+BTF_ID(struct, nf_conn___init)
+
+/* Check writes into `struct nf_conn` */
+static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ const struct btf_type *ncit;
+ const struct btf_type *nct;
+ size_t end;
+
+ ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
+ nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
+
+ if (t != nct && t != ncit) {
+ bpf_log(log, "only read is supported\n");
+ return -EACCES;
+ }
+
+ /* `struct nf_conn` and `struct nf_conn___init` have the same layout
+ * so we are safe to simply merge offset checks here
+ */
+ switch (off) {
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+ case offsetof(struct nf_conn, mark):
+ end = offsetofend(struct nf_conn, mark);
+ break;
+#endif
+ default:
+ bpf_log(log, "no write support to nf_conn at off %d\n", off);
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log,
+ "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
+ off, size, end);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_conntrack BTF");
+
+/* bpf_xdp_ct_alloc - Allocate a new CT entry
+ *
+ * Parameters:
+ * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for allocation (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn___init *
+bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+ struct nf_conn *nfct;
+
+ nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
+ opts, opts__sz, 10);
+ if (IS_ERR(nfct)) {
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+
+ return (struct nf_conn___init *)nfct;
+}
+
+/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ caller_net = dev_net(ctx->rxq->dev);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
+ if (IS_ERR(nfct)) {
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_skb_ct_alloc - Allocate a new CT entry
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for allocation (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn___init *
+bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct nf_conn *nfct;
+ struct net *net;
+
+ net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
+ if (IS_ERR(nfct)) {
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+
+ return (struct nf_conn___init *)nfct;
+}
+
+/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
+ * reference to it
+ *
+ * Parameters:
+ * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
+ * Cannot be NULL
+ * @bpf_tuple - Pointer to memory representing the tuple to look up
+ * Cannot be NULL
+ * @tuple__sz - Length of the tuple structure
+ * Must be one of sizeof(bpf_tuple->ipv4) or
+ * sizeof(bpf_tuple->ipv6)
+ * @opts - Additional options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_ct_opts structure
+ * Must be NF_BPF_CT_OPTS_SZ (12)
+ */
+struct nf_conn *
+bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
+ u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
+{
+ struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+ struct net *caller_net;
+ struct nf_conn *nfct;
+
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
+ if (IS_ERR(nfct)) {
+ if (opts)
+ opts->error = PTR_ERR(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_ct_insert_entry - Add the provided entry into a CT map
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID.
+ *
+ * @nfct - Pointer to referenced nf_conn___init object, obtained
+ * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ */
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
+{
+ struct nf_conn *nfct = (struct nf_conn *)nfct_i;
+ int err;
+
+ nfct->status |= IPS_CONFIRMED;
+ err = nf_conntrack_hash_check_insert(nfct);
+ if (err < 0) {
+ nf_conntrack_free(nfct);
+ return NULL;
+ }
+ return nfct;
+}
+
+/* bpf_ct_release - Release acquired nf_conn object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @nf_conn - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ */
+void bpf_ct_release(struct nf_conn *nfct)
+{
+ if (!nfct)
+ return;
+ nf_ct_put(nfct);
+}
+
+/* bpf_ct_set_timeout - Set timeout of allocated nf_conn
+ *
+ * Sets the default timeout of newly allocated nf_conn before insertion.
+ * This helper must be invoked for refcounted pointer to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @timeout - Timeout in msecs.
+ */
+void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
+{
+ __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
+}
+
+/* bpf_ct_change_timeout - Change timeout of inserted nf_conn
+ *
+ * Change timeout associated of the inserted or looked up nf_conn.
+ * This helper must be invoked for refcounted pointer to nf_conn.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
+ * @timeout - New timeout in msecs.
+ */
+int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
+{
+ return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
+}
+
+/* bpf_ct_set_status - Set status field of allocated nf_conn
+ *
+ * Set the status field of the newly allocated nf_conn before insertion.
+ * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @status - New status value.
+ */
+int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
+{
+ return nf_ct_change_status_common((struct nf_conn *)nfct, status);
+}
+
+/* bpf_ct_change_status - Change status of inserted nf_conn
+ *
+ * Change the status field of the provided connection tracking entry.
+ * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
+ * @status - New status value.
+ */
+int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
+{
+ return nf_ct_change_status_common(nfct, status);
+}
+
+__diag_pop()
+
+BTF_SET8_START(nf_ct_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
+BTF_SET8_END(nf_ct_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &nf_ct_kfunc_set,
+};
+
+int register_nf_conntrack_bpf(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+ if (!ret) {
+ mutex_lock(&nf_conn_btf_access_lock);
+ nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
+ mutex_unlock(&nf_conn_btf_access_lock);
+ }
+
+ return ret;
+}
+
+void cleanup_nf_conntrack_bpf(void)
+{
+ mutex_lock(&nf_conn_btf_access_lock);
+ nfct_btf_struct_access = NULL;
+ mutex_unlock(&nf_conn_btf_access_lock);
+}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 1ba6becc3079..9fb9b8031298 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -20,6 +20,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int timeout)
{
+ const struct nf_conntrack_helper *helper;
struct nf_conntrack_expect *exp;
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
@@ -58,7 +59,10 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
goto out;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
+
+ helper = rcu_dereference(help->helper);
+ if (helper)
+ exp->tuple.src.u.udp.port = helper->tuple.src.u.udp.port;
exp->mask.src.u3.ip = mask;
exp->mask.src.u.udp.port = htons(0xFFFF);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 894a325d39f2..f97bda06d2a9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -34,10 +34,10 @@
#include <linux/rculist_nulls.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -47,7 +47,6 @@
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
-#include <net/netfilter/nf_conntrack_act_ct.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
@@ -67,6 +66,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
u32 next_bucket;
+ u32 avg_timeout;
+ u32 count;
+ u32 start_time;
bool exiting;
bool early_drop;
};
@@ -78,8 +80,21 @@ static __read_mostly bool nf_conntrack_locks_all;
/* serialize hash resizes and nf_ct_iterate_cleanup */
static DEFINE_MUTEX(nf_conntrack_mutex);
-#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_INTERVAL_MAX (60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN (1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
+
+/* Initial bias pretending we have 100 entries at the upper bound so we don't
+ * wakeup often just because we have three entries with a 1s timeout while still
+ * allowing non-idle machines to wakeup more often when needed.
+ */
+#define GC_SCAN_INITIAL_COUNT 100
+#define GC_SCAN_INTERVAL_INIT GC_SCAN_INTERVAL_MAX
+
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX (64000u / HZ)
#define MIN_CHAINLEN 8u
#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
@@ -317,20 +332,18 @@ nf_ct_get_tuple(const struct sk_buff *skb,
return gre_pkt_to_tuple(skb, dataoff, net, tuple);
#endif
case IPPROTO_TCP:
- case IPPROTO_UDP: /* fallthrough */
- return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+ case IPPROTO_UDP:
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
case IPPROTO_UDPLITE:
- return nf_ct_get_tuple_ports(skb, dataoff, tuple);
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP:
- return nf_ct_get_tuple_ports(skb, dataoff, tuple);
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
case IPPROTO_DCCP:
- return nf_ct_get_tuple_ports(skb, dataoff, tuple);
#endif
+ /* fallthrough */
+ return nf_ct_get_tuple_ports(skb, dataoff, tuple);
default:
break;
}
@@ -513,50 +526,6 @@ clean_from_lists(struct nf_conn *ct)
nf_ct_remove_expectations(ct);
}
-/* must be called with local_bh_disable */
-static void nf_ct_add_to_dying_list(struct nf_conn *ct)
-{
- struct ct_pcpu *pcpu;
-
- /* add this conntrack to the (per cpu) dying list */
- ct->cpu = smp_processor_id();
- pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
-
- spin_lock(&pcpu->lock);
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &pcpu->dying);
- spin_unlock(&pcpu->lock);
-}
-
-/* must be called with local_bh_disable */
-static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
-{
- struct ct_pcpu *pcpu;
-
- /* add this conntrack to the (per cpu) unconfirmed list */
- ct->cpu = smp_processor_id();
- pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
-
- spin_lock(&pcpu->lock);
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &pcpu->unconfirmed);
- spin_unlock(&pcpu->lock);
-}
-
-/* must be called with local_bh_disable */
-static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
-{
- struct ct_pcpu *pcpu;
-
- /* We overload first tuple to link into unconfirmed or dying list.*/
- pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
-
- spin_lock(&pcpu->lock);
- BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
- spin_unlock(&pcpu->lock);
-}
-
#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
/* Released via nf_ct_destroy() */
@@ -594,7 +563,7 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
- nf_ct_ext_destroy(tmpl);
+ kfree(tmpl->ext);
if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
kfree((char *)tmpl - tmpl->proto.tmpl_padto);
@@ -628,7 +597,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
destroy_gre_conntrack(ct);
- local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
@@ -636,10 +604,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
*/
nf_ct_remove_expectations(ct);
- nf_ct_del_from_dying_or_unconfirmed_list(ct);
-
- local_bh_enable();
-
if (ct->master)
nf_ct_put(ct->master);
@@ -648,15 +612,12 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_ct_destroy);
-static void nf_ct_delete_from_lists(struct nf_conn *ct)
+static void __nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
unsigned int hash, reply_hash;
unsigned int sequence;
- nf_ct_helper_destroy(ct);
-
- local_bh_disable();
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
@@ -669,12 +630,30 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
clean_from_lists(ct);
nf_conntrack_double_unlock(hash, reply_hash);
+}
- nf_ct_add_to_dying_list(ct);
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
+{
+ nf_ct_helper_destroy(ct);
+ local_bh_disable();
+
+ __nf_ct_delete_from_lists(ct);
local_bh_enable();
}
+static void nf_ct_add_to_ecache_list(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct));
+
+ spin_lock(&cnet->ecache.dying_lock);
+ hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &cnet->ecache.dying_list);
+ spin_unlock(&cnet->ecache.dying_lock);
+#endif
+}
+
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
@@ -697,7 +676,12 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
/* destroy event was not delivered. nf_ct_put will
* be done by event cache worker on redelivery.
*/
- nf_ct_delete_from_lists(ct);
+ nf_ct_helper_destroy(ct);
+ local_bh_disable();
+ __nf_ct_delete_from_lists(ct);
+ nf_ct_add_to_ecache_list(ct);
+ local_bh_enable();
+
nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL);
return false;
}
@@ -746,6 +730,9 @@ static void nf_ct_gc_expired(struct nf_conn *ct)
if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (nf_ct_should_gc(ct))
nf_ct_kill(ct);
@@ -812,6 +799,9 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
*/
ct = nf_ct_tuplehash_to_ctrack(h);
if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
+ /* re-check key after refcount */
+ smp_acquire__after_ctrl_dep();
+
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
@@ -858,6 +848,33 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
&nf_conntrack_hash[reply_hash]);
}
+static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext)
+{
+ /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions
+ * may contain stale pointers to e.g. helper that has been removed.
+ *
+ * The helper can't clear this because the nf_conn object isn't in
+ * any hash and synchronize_rcu() isn't enough because associated skb
+ * might sit in a queue.
+ */
+ return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid);
+}
+
+static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext)
+{
+ if (!ext)
+ return true;
+
+ if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid))
+ return false;
+
+ /* inserted into conntrack table, nf_ct_iterate_cleanup()
+ * will find it. Disable nf_ct_ext_find() id check.
+ */
+ WRITE_ONCE(ext->gen_id, 0);
+ return true;
+}
+
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
@@ -873,6 +890,11 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
+ if (!nf_ct_ext_valid_pre(ct->ext)) {
+ NF_CT_STAT_INC(net, insert_failed);
+ return -ETIMEDOUT;
+ }
+
local_bh_disable();
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
@@ -913,6 +935,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
local_bh_enable();
+
+ if (!nf_ct_ext_valid_post(ct->ext)) {
+ nf_ct_kill(ct);
+ NF_CT_STAT_INC(net, drop);
+ return -ETIMEDOUT;
+ }
+
return 0;
chaintoolong:
NF_CT_STAT_INC(net, chaintoolong);
@@ -960,7 +989,6 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
struct nf_conn_tstamp *tstamp;
refcount_inc(&ct->ct_general.use);
- ct->status |= IPS_CONFIRMED;
/* set conntrack timestamp, if enabled. */
tstamp = nf_conn_tstamp_find(ct);
@@ -989,7 +1017,6 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
nf_conntrack_get(&ct->ct_general);
nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_ct_add_to_dying_list(loser_ct);
nf_ct_put(loser_ct);
nf_ct_set(skb, ct, ctinfo);
@@ -1122,7 +1149,6 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
return ret;
drop:
- nf_ct_add_to_dying_list(loser_ct);
NF_CT_STAT_INC(net, drop);
NF_CT_STAT_INC(net, insert_failed);
return NF_DROP;
@@ -1183,16 +1209,20 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_DROP;
}
+ if (!nf_ct_ext_valid_pre(ct->ext)) {
+ NF_CT_STAT_INC(net, insert_failed);
+ goto dying;
+ }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
- nf_ct_del_from_dying_or_unconfirmed_list(ct);
+ ct->status |= IPS_CONFIRMED;
if (unlikely(nf_ct_is_dying(ct))) {
- nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, insert_failed);
goto dying;
}
@@ -1216,7 +1246,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto out;
if (chainlen++ > max_chainlen) {
chaintoolong:
- nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
NF_CT_STAT_INC(net, insert_failed);
ret = NF_DROP;
@@ -1240,6 +1269,16 @@ chaintoolong:
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
+ /* ext area is still valid (rcu read lock is held,
+ * but will go out of scope soon, we need to remove
+ * this conntrack again.
+ */
+ if (!nf_ct_ext_valid_post(ct->ext)) {
+ nf_ct_kill(ct);
+ NF_CT_STAT_INC(net, drop);
+ return NF_DROP;
+ }
+
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct);
@@ -1355,6 +1394,9 @@ static unsigned int early_drop_list(struct net *net,
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
+ /* load ->ct_net and ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
/* kill only if still in same netns -- might have moved due to
* SLAB_TYPESAFE_BY_RCU rules.
*
@@ -1421,16 +1463,31 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
unsigned int i, hashsz, nf_conntrack_max95 = 0;
- unsigned long next_run = GC_SCAN_INTERVAL;
+ u32 end_time, start_time = nfct_time_stamp;
struct conntrack_gc_work *gc_work;
+ unsigned int expired_count = 0;
+ unsigned long next_run;
+ s32 delta_time;
+ long count;
+
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
+ if (i == 0) {
+ gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->count = GC_SCAN_INITIAL_COUNT;
+ gc_work->start_time = start_time;
+ }
+
+ next_run = gc_work->avg_timeout;
+ count = gc_work->count;
+
+ end_time = start_time + GC_SCAN_MAX_DURATION;
+
do {
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
@@ -1448,6 +1505,7 @@ static void gc_worker(struct work_struct *work)
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
struct net *net;
+ long expires;
tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1456,11 +1514,30 @@ static void gc_worker(struct work_struct *work)
continue;
}
+ if (expired_count > GC_SCAN_EXPIRED_MAX) {
+ rcu_read_unlock();
+
+ gc_work->next_bucket = i;
+ gc_work->avg_timeout = next_run;
+ gc_work->count = count;
+
+ delta_time = nfct_time_stamp - gc_work->start_time;
+
+ /* re-sched immediately if total cycle time is exceeded */
+ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+ goto early_exit;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
+ expired_count++;
continue;
}
+ expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+ expires = (expires - (long)next_run) / ++count;
+ next_run += expires;
+
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
@@ -1473,13 +1550,18 @@ static void gc_worker(struct work_struct *work)
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (gc_worker_skip_ct(tmp)) {
nf_ct_put(tmp);
continue;
}
- if (gc_worker_can_early_drop(tmp))
+ if (gc_worker_can_early_drop(tmp)) {
nf_ct_kill(tmp);
+ expired_count++;
+ }
nf_ct_put(tmp);
}
@@ -1492,33 +1574,39 @@ static void gc_worker(struct work_struct *work)
cond_resched();
i++;
- if (time_after(jiffies, end_time) && i < hashsz) {
+ delta_time = nfct_time_stamp - end_time;
+ if (delta_time > 0 && i < hashsz) {
+ gc_work->avg_timeout = next_run;
+ gc_work->count = count;
gc_work->next_bucket = i;
next_run = 0;
- break;
+ goto early_exit;
}
} while (i < hashsz);
+ gc_work->next_bucket = 0;
+
+ next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+ delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+ if (next_run > (unsigned long)delta_time)
+ next_run -= delta_time;
+ else
+ next_run = 1;
+
+early_exit:
if (gc_work->exiting)
return;
- /*
- * Eviction will normally happen from the packet path, and not
- * from this gc worker.
- *
- * This worker is only here to reap expired entries when system went
- * idle after a busy period.
- */
- if (next_run) {
+ if (next_run)
gc_work->early_drop = false;
- gc_work->next_bucket = 0;
- }
+
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->exiting = false;
}
@@ -1597,7 +1685,17 @@ void nf_conntrack_free(struct nf_conn *ct)
*/
WARN_ON(refcount_read(&ct->ct_general.use) != 0);
- nf_ct_ext_destroy(ct);
+ if (ct->status & IPS_SRC_NAT_DONE) {
+ const struct nf_nat_hook *nat_hook;
+
+ rcu_read_lock();
+ nat_hook = rcu_dereference(nf_nat_hook);
+ if (nat_hook)
+ nat_hook->remove_nat_bysrc(ct);
+ rcu_read_unlock();
+ }
+
+ kfree(ct->ext);
kmem_cache_free(nf_conntrack_cachep, ct);
cnet = nf_ct_pernet(net);
@@ -1618,7 +1716,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conn *ct;
struct nf_conn_help *help;
struct nf_conntrack_tuple repl_tuple;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
+#endif
struct nf_conntrack_expect *exp = NULL;
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
@@ -1651,15 +1751,21 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
nf_ct_labels_ext_add(ct);
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
- nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
- ecache ? ecache->expmask : 0,
- GFP_ATOMIC);
- local_bh_disable();
+ if ((ecache || net->ct.sysctl_events) &&
+ !nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+ ecache ? ecache->expmask : 0,
+ GFP_ATOMIC)) {
+ nf_conntrack_free(ct);
+ return ERR_PTR(-ENOMEM);
+ }
+#endif
+
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
- spin_lock(&nf_conntrack_expect_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
pr_debug("expectation arrives ct=%p exp=%p\n",
@@ -1682,16 +1788,23 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
#endif
NF_CT_STAT_INC(net, expect_new);
}
- spin_unlock(&nf_conntrack_expect_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
- if (!exp)
+ if (!exp && tmpl)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
- /* Now it is inserted into the unconfirmed list, set refcount to 1. */
- refcount_set(&ct->ct_general.use, 1);
- nf_ct_add_to_unconfirmed_list(ct);
+ /* Other CPU might have obtained a pointer to this object before it was
+ * released. Because refcount is 0, refcount_inc_not_zero() will fail.
+ *
+ * After refcount_set(1) it will succeed; ensure that zeroing of
+ * ct->status and the correct ct->net pointer are visible; else other
+ * core might observe CONFIRMED bit which means the entry is valid and
+ * in the hash table, but its not (anymore).
+ */
+ smp_wmb();
- local_bh_enable();
+ /* Now it is going to be associated with an sk_buff, set refcount to 1. */
+ refcount_set(&ct->ct_general.use, 1);
if (exp) {
if (exp->expectfn)
@@ -1748,9 +1861,6 @@ resolve_normal_ct(struct nf_conn *tmpl,
return 0;
if (IS_ERR(h))
return PTR_ERR(h);
-
- ct = nf_ct_tuplehash_to_ctrack(h);
- ct->local_origin = state->hook == NF_INET_LOCAL_OUT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1924,15 +2034,17 @@ repeat:
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_ct_put(ct);
skb->_nfct = 0;
- NF_CT_STAT_INC_ATOMIC(state->net, invalid);
- if (ret == -NF_DROP)
- NF_CT_STAT_INC_ATOMIC(state->net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
*/
if (ret == -NF_REPEAT)
goto repeat;
+
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+ if (ret == -NF_DROP)
+ NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
ret = -ret;
goto out;
}
@@ -1964,10 +2076,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
if (ct->master || (help && !hlist_empty(&help->expectations)))
return;
-
- rcu_read_lock();
- __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
- rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
@@ -2260,7 +2368,7 @@ static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
/* Bring out ya dead! */
static struct nf_conn *
get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
- void *data, unsigned int *bucket)
+ const struct nf_ct_iter_data *iter_data, unsigned int *bucket)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -2291,7 +2399,12 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
* tuple while iterating.
*/
ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
+
+ if (iter_data->net &&
+ !net_eq(iter_data->net, nf_ct_net(ct)))
+ continue;
+
+ if (iter(ct, iter_data->data))
goto found;
}
spin_unlock(lockp);
@@ -2308,7 +2421,7 @@ found:
}
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report)
+ const struct nf_ct_iter_data *iter_data)
{
unsigned int bucket = 0;
struct nf_conn *ct;
@@ -2316,91 +2429,28 @@ static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
might_sleep();
mutex_lock(&nf_conntrack_mutex);
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ while ((ct = get_next_corpse(iter, iter_data, &bucket)) != NULL) {
/* Time to push up daises... */
- nf_ct_delete(ct, portid, report);
+ nf_ct_delete(ct, iter_data->portid, iter_data->report);
nf_ct_put(ct);
cond_resched();
}
mutex_unlock(&nf_conntrack_mutex);
}
-struct iter_data {
- int (*iter)(struct nf_conn *i, void *data);
- void *data;
- struct net *net;
-};
-
-static int iter_net_only(struct nf_conn *i, void *data)
-{
- struct iter_data *d = data;
-
- if (!net_eq(d->net, nf_ct_net(i)))
- return 0;
-
- return d->iter(i, d->data);
-}
-
-static void
-__nf_ct_unconfirmed_destroy(struct net *net)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct nf_conntrack_tuple_hash *h;
- struct hlist_nulls_node *n;
- struct ct_pcpu *pcpu;
-
- pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
- struct nf_conn *ct;
-
- ct = nf_ct_tuplehash_to_ctrack(h);
-
- /* we cannot call iter() on unconfirmed list, the
- * owning cpu can reallocate ct->ext at any time.
- */
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- spin_unlock_bh(&pcpu->lock);
- cond_resched();
- }
-}
-
-void nf_ct_unconfirmed_destroy(struct net *net)
+void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data),
+ const struct nf_ct_iter_data *iter_data)
{
+ struct net *net = iter_data->net;
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
might_sleep();
- if (atomic_read(&cnet->count) > 0) {
- __nf_ct_unconfirmed_destroy(net);
- nf_queue_nf_hook_drop(net);
- synchronize_net();
- }
-}
-EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
-
-void nf_ct_iterate_cleanup_net(struct net *net,
- int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report)
-{
- struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- struct iter_data d;
-
- might_sleep();
-
if (atomic_read(&cnet->count) == 0)
return;
- d.iter = iter;
- d.data = data;
- d.net = net;
-
- nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
+ nf_ct_iterate_cleanup(iter, iter_data);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -2418,6 +2468,7 @@ EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
void
nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
{
+ struct nf_ct_iter_data iter_data = {};
struct net *net;
down_read(&net_rwsem);
@@ -2426,35 +2477,46 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&cnet->count) == 0)
continue;
- __nf_ct_unconfirmed_destroy(net);
nf_queue_nf_hook_drop(net);
}
up_read(&net_rwsem);
/* Need to wait for netns cleanup worker to finish, if its
* running -- it might have deleted a net namespace from
- * the global list, so our __nf_ct_unconfirmed_destroy() might
- * not have affected all namespaces.
+ * the global list, so hook drop above might not have
+ * affected all namespaces.
*/
net_ns_barrier();
- /* a conntrack could have been unlinked from unconfirmed list
- * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy().
+ /* a skb w. unconfirmed conntrack could have been reinjected just
+ * before we called nf_queue_nf_hook_drop().
+ *
* This makes sure its inserted into conntrack table.
*/
synchronize_net();
- nf_ct_iterate_cleanup(iter, data, 0, 0);
+ nf_ct_ext_bump_genid();
+ iter_data.data = data;
+ nf_ct_iterate_cleanup(iter, &iter_data);
+
+ /* Another cpu might be in a rcu read section with
+ * rcu protected pointer cleared in iter callback
+ * or hidden via nf_ct_ext_bump_genid() above.
+ *
+ * Wait until those are done.
+ */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
static int kill_all(struct nf_conn *i, void *data)
{
- return net_eq(nf_ct_net(i), data);
+ return 1;
}
void nf_conntrack_cleanup_start(void)
{
+ cleanup_nf_conntrack_bpf();
conntrack_gc_work.exiting = true;
}
@@ -2465,13 +2527,7 @@ void nf_conntrack_cleanup_end(void)
kvfree(nf_conntrack_hash);
nf_conntrack_proto_fini();
- nf_conntrack_seqadj_fini();
- nf_conntrack_labels_fini();
nf_conntrack_helper_fini();
- nf_conntrack_timeout_fini();
- nf_conntrack_ecache_fini();
- nf_conntrack_tstamp_fini();
- nf_conntrack_acct_fini();
nf_conntrack_expect_fini();
kmem_cache_destroy(nf_conntrack_cachep);
@@ -2491,8 +2547,9 @@ void nf_conntrack_cleanup_net(struct net *net)
void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
{
- int busy;
+ struct nf_ct_iter_data iter_data = {};
struct net *net;
+ int busy;
/*
* This makes sure all current packets have passed through
@@ -2505,7 +2562,8 @@ i_see_dead_people:
list_for_each_entry(net, net_exit_list, exit_list) {
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- nf_ct_iterate_cleanup(kill_all, net, 0, 0);
+ iter_data.net = net;
+ nf_ct_iterate_cleanup_net(kill_all, &iter_data);
if (atomic_read(&cnet->count) != 0)
busy = 1;
}
@@ -2518,7 +2576,6 @@ i_see_dead_people:
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
- free_percpu(net->ct.pcpu_lists);
}
}
@@ -2626,39 +2683,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
return nf_conntrack_hash_resize(hashsize);
}
-static __always_inline unsigned int total_extension_size(void)
-{
- /* remember to add new extensions below */
- BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
-
- return sizeof(struct nf_ct_ext) +
- sizeof(struct nf_conn_help)
-#if IS_ENABLED(CONFIG_NF_NAT)
- + sizeof(struct nf_conn_nat)
-#endif
- + sizeof(struct nf_conn_seqadj)
- + sizeof(struct nf_conn_acct)
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- + sizeof(struct nf_conntrack_ecache)
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
- + sizeof(struct nf_conn_tstamp)
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- + sizeof(struct nf_conn_timeout)
-#endif
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- + sizeof(struct nf_conn_labels)
-#endif
-#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
- + sizeof(struct nf_conn_synproxy)
-#endif
-#if IS_ENABLED(CONFIG_NET_ACT_CT)
- + sizeof(struct nf_conn_act_ct_ext)
-#endif
- ;
-};
-
int nf_conntrack_init_start(void)
{
unsigned long nr_pages = totalram_pages();
@@ -2666,9 +2690,6 @@ int nf_conntrack_init_start(void)
int ret = -ENOMEM;
int i;
- /* struct nf_ct_ext uses u8 to store offsets/size */
- BUILD_BUG_ON(total_extension_size() > 255u);
-
seqcount_spinlock_init(&nf_conntrack_generation,
&nf_conntrack_locks_all_lock);
@@ -2713,34 +2734,10 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_expect;
- ret = nf_conntrack_acct_init();
- if (ret < 0)
- goto err_acct;
-
- ret = nf_conntrack_tstamp_init();
- if (ret < 0)
- goto err_tstamp;
-
- ret = nf_conntrack_ecache_init();
- if (ret < 0)
- goto err_ecache;
-
- ret = nf_conntrack_timeout_init();
- if (ret < 0)
- goto err_timeout;
-
ret = nf_conntrack_helper_init();
if (ret < 0)
goto err_helper;
- ret = nf_conntrack_labels_init();
- if (ret < 0)
- goto err_labels;
-
- ret = nf_conntrack_seqadj_init();
- if (ret < 0)
- goto err_seqadj;
-
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
@@ -2748,23 +2745,18 @@ int nf_conntrack_init_start(void)
conntrack_gc_work_init(&conntrack_gc_work);
queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
+ ret = register_nf_conntrack_bpf();
+ if (ret < 0)
+ goto err_kfunc;
+
return 0;
+err_kfunc:
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
+ nf_conntrack_proto_fini();
err_proto:
- nf_conntrack_seqadj_fini();
-err_seqadj:
- nf_conntrack_labels_fini();
-err_labels:
nf_conntrack_helper_fini();
err_helper:
- nf_conntrack_timeout_fini();
-err_timeout:
- nf_conntrack_ecache_fini();
-err_ecache:
- nf_conntrack_tstamp_fini();
-err_tstamp:
- nf_conntrack_acct_fini();
-err_acct:
nf_conntrack_expect_fini();
err_expect:
kmem_cache_destroy(nf_conntrack_cachep);
@@ -2789,33 +2781,19 @@ void nf_conntrack_init_end(void)
* We need to use special "null" values, not used in hash table
*/
#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
-#define DYING_NULLS_VAL ((1<<30)+1)
int nf_conntrack_init_net(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
int ret = -ENOMEM;
- int cpu;
BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
atomic_set(&cnet->count, 0);
- net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
- if (!net->ct.pcpu_lists)
- goto err_stat;
-
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_init(&pcpu->lock);
- INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
- }
-
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat)
- goto err_pcpu_lists;
+ return ret;
ret = nf_conntrack_expect_pernet_init(net);
if (ret < 0)
@@ -2824,15 +2802,67 @@ int nf_conntrack_init_net(struct net *net)
nf_conntrack_acct_pernet_init(net);
nf_conntrack_tstamp_pernet_init(net);
nf_conntrack_ecache_pernet_init(net);
- nf_conntrack_helper_pernet_init(net);
nf_conntrack_proto_pernet_init(net);
return 0;
err_expect:
free_percpu(net->ct.stat);
-err_pcpu_lists:
- free_percpu(net->ct.pcpu_lists);
-err_stat:
return ret;
}
+
+/* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */
+
+int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout)
+{
+ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+ return -EPERM;
+
+ __nf_ct_set_timeout(ct, timeout);
+
+ if (test_bit(IPS_DYING_BIT, &ct->status))
+ return -ETIME;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_change_timeout);
+
+void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off)
+{
+ unsigned int bit;
+
+ /* Ignore these unchangable bits */
+ on &= ~IPS_UNCHANGEABLE_MASK;
+ off &= ~IPS_UNCHANGEABLE_MASK;
+
+ for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+ if (on & (1 << bit))
+ set_bit(bit, &ct->status);
+ else if (off & (1 << bit))
+ clear_bit(bit, &ct->status);
+ }
+}
+EXPORT_SYMBOL_GPL(__nf_ct_change_status);
+
+int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status)
+{
+ unsigned long d;
+
+ d = ct->status ^ status;
+
+ if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+ /* unchangeable */
+ return -EBUSY;
+
+ if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+ /* SEEN_REPLY bit can only be set */
+ return -EBUSY;
+
+ if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+ /* ASSURED bit can only be set */
+ return -EBUSY;
+
+ __nf_ct_change_status(ct, status, 0);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_change_status_common);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 41768ff19464..8698b3424646 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,7 +16,6 @@
#include <linux/vmalloc.h>
#include <linux/stddef.h>
#include <linux/err.h>
-#include <linux/percpu.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
@@ -29,8 +28,9 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
-#define ECACHE_RETRY_WAIT (HZ/10)
-#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
+#define DYING_NULLS_VAL ((1 << 30) + 1)
+#define ECACHE_MAX_JIFFIES msecs_to_jiffies(10)
+#define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10)
enum retry_state {
STATE_CONGESTED,
@@ -38,106 +38,100 @@ enum retry_state {
STATE_DONE,
};
-static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
+struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net)
{
- struct nf_conn *refs[ECACHE_STACK_ALLOC];
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+ return &cnet->ecache;
+}
+#if IS_MODULE(CONFIG_NF_CT_NETLINK)
+EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache);
+#endif
+
+static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet)
+{
+ unsigned long stop = jiffies + ECACHE_MAX_JIFFIES;
+ struct hlist_nulls_head evicted_list;
enum retry_state ret = STATE_DONE;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int evicted = 0;
+ unsigned int sent;
+
+ INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL);
- spin_lock(&pcpu->lock);
+next:
+ sent = 0;
+ spin_lock_bh(&cnet->ecache.dying_lock);
- hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+ hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_ecache *e;
-
- if (!nf_ct_is_confirmed(ct))
- continue;
-
- /* This ecache access is safe because the ct is on the
- * pcpu dying list and we hold the spinlock -- the entry
- * cannot be free'd until after the lock is released.
- *
- * This is true even if ct has a refcount of 0: the
- * cpu that is about to free the entry must remove it
- * from the dying list and needs the lock to do so.
- */
- e = nf_ct_ecache_find(ct);
- if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
- continue;
- /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
- * the worker owns this entry: the ct will remain valid
- * until the worker puts its ct reference.
+ /* The worker owns all entries, ct remains valid until nf_ct_put
+ * in the loop below.
*/
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
ret = STATE_CONGESTED;
break;
}
- e->state = NFCT_ECACHE_DESTROY_SENT;
- refs[evicted] = ct;
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list);
- if (++evicted >= ARRAY_SIZE(refs)) {
+ if (time_after(stop, jiffies)) {
ret = STATE_RESTART;
break;
}
+
+ if (sent++ > 16) {
+ spin_unlock_bh(&cnet->ecache.dying_lock);
+ cond_resched();
+ goto next;
+ }
}
- spin_unlock(&pcpu->lock);
+ spin_unlock_bh(&cnet->ecache.dying_lock);
- /* can't _put while holding lock */
- while (evicted)
- nf_ct_put(refs[--evicted]);
+ hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
+ nf_ct_put(ct);
+
+ cond_resched();
+ }
return ret;
}
static void ecache_work(struct work_struct *work)
{
- struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
- struct netns_ct *ctnet = cnet->ct_net;
- int cpu, delay = -1;
- struct ct_pcpu *pcpu;
-
- local_bh_disable();
-
- for_each_possible_cpu(cpu) {
- enum retry_state ret;
-
- pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
-
- ret = ecache_work_evict_list(pcpu);
-
- switch (ret) {
- case STATE_CONGESTED:
- delay = ECACHE_RETRY_WAIT;
- goto out;
- case STATE_RESTART:
- delay = 0;
- break;
- case STATE_DONE:
- break;
- }
+ struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
+ int ret, delay = -1;
+
+ ret = ecache_work_evict_list(cnet);
+ switch (ret) {
+ case STATE_CONGESTED:
+ delay = ECACHE_RETRY_JIFFIES;
+ break;
+ case STATE_RESTART:
+ delay = 0;
+ break;
+ case STATE_DONE:
+ break;
}
- out:
- local_bh_enable();
-
- ctnet->ecache_dwork_pending = delay > 0;
if (delay >= 0)
- schedule_delayed_work(&cnet->ecache_dwork, delay);
+ schedule_delayed_work(&cnet->ecache.dwork, delay);
}
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
- const unsigned int events,
- const unsigned long missed,
+ const u32 events,
+ const u32 missed,
const struct nf_ct_event *item)
{
- struct nf_conn *ct = item->ct;
struct net *net = nf_ct_net(item->ct);
struct nf_ct_event_notifier *notify;
+ u32 old, want;
int ret;
if (!((events | missed) & e->ctmask))
@@ -157,12 +151,13 @@ static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
if (likely(ret >= 0 && missed == 0))
return 0;
- spin_lock_bh(&ct->lock);
- if (ret < 0)
- e->missed |= events;
- else
- e->missed &= ~missed;
- spin_unlock_bh(&ct->lock);
+ do {
+ old = READ_ONCE(e->missed);
+ if (ret < 0)
+ want = old | events;
+ else
+ want = old & ~missed;
+ } while (cmpxchg(&e->missed, old, want) != old);
return ret;
}
@@ -172,7 +167,7 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
- unsigned long missed;
+ unsigned int missed;
int ret;
if (!nf_ct_is_confirmed(ct))
@@ -198,7 +193,6 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
*/
if (e->portid == 0 && portid != 0)
e->portid = portid;
- e->state = NFCT_ECACHE_DESTROY_FAIL;
}
return ret;
@@ -211,7 +205,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
- unsigned long events;
+ unsigned int events;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
return;
@@ -292,52 +286,73 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (state == NFCT_ECACHE_DESTROY_FAIL &&
- !delayed_work_pending(&cnet->ecache_dwork)) {
- schedule_delayed_work(&cnet->ecache_dwork, HZ);
+ !delayed_work_pending(&cnet->ecache.dwork)) {
+ schedule_delayed_work(&cnet->ecache.dwork, HZ);
net->ct.ecache_dwork_pending = true;
} else if (state == NFCT_ECACHE_DESTROY_SENT) {
- net->ct.ecache_dwork_pending = false;
- mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
+ if (!hlist_nulls_empty(&cnet->ecache.dying_list))
+ mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
+ else
+ net->ct.ecache_dwork_pending = false;
}
}
-#define NF_CT_EVENTS_DEFAULT 1
-static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_ecache *e;
-static const struct nf_ct_ext_type event_extend = {
- .len = sizeof(struct nf_conntrack_ecache),
- .align = __alignof__(struct nf_conntrack_ecache),
- .id = NF_CT_EXT_ECACHE,
-};
+ switch (net->ct.sysctl_events) {
+ case 0:
+ /* assignment via template / ruleset? ignore sysctl. */
+ if (ctmask || expmask)
+ break;
+ return true;
+ case 2: /* autodetect: no event listener, don't allocate extension. */
+ if (!READ_ONCE(net->ct.ctnetlink_has_listener))
+ return true;
+ fallthrough;
+ case 1:
+ /* always allocate an extension. */
+ if (!ctmask && !expmask) {
+ ctmask = ~0;
+ expmask = ~0;
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return true;
+ }
-void nf_conntrack_ecache_pernet_init(struct net *net)
-{
- struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+ e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
+ if (e) {
+ e->ctmask = ctmask;
+ e->expmask = expmask;
+ }
- net->ct.sysctl_events = nf_ct_events;
- cnet->ct_net = &net->ct;
- INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
+ return e != NULL;
}
+EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add);
-void nf_conntrack_ecache_pernet_fini(struct net *net)
+#define NF_CT_EVENTS_DEFAULT 2
+static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+
+void nf_conntrack_ecache_pernet_init(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- cancel_delayed_work_sync(&cnet->ecache_dwork);
-}
-
-int nf_conntrack_ecache_init(void)
-{
- int ret = nf_ct_extend_register(&event_extend);
- if (ret < 0)
- pr_err("Unable to register event extension\n");
+ net->ct.sysctl_events = nf_ct_events;
- BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */
+ INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
+ INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL);
+ spin_lock_init(&cnet->ecache.dying_lock);
- return ret;
+ BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
}
-void nf_conntrack_ecache_fini(void)
+void nf_conntrack_ecache_pernet_fini(struct net *net)
{
- nf_ct_extend_unregister(&event_extend);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+ cancel_delayed_work_sync(&cnet->ecache.dwork);
}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 3dbe2329c3f1..0b513f7bf9f3 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -13,40 +13,92 @@
#include <linux/skbuff.h>
#include <net/netfilter/nf_conntrack_extend.h>
-static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
-static DEFINE_MUTEX(nf_ct_ext_type_mutex);
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
+#include <net/netfilter/nf_nat.h>
+
#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */
-void nf_ct_ext_destroy(struct nf_conn *ct)
+atomic_t nf_conntrack_ext_genid __read_mostly = ATOMIC_INIT(1);
+
+static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
+ [NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help),
+#if IS_ENABLED(CONFIG_NF_NAT)
+ [NF_CT_EXT_NAT] = sizeof(struct nf_conn_nat),
+#endif
+ [NF_CT_EXT_SEQADJ] = sizeof(struct nf_conn_seqadj),
+ [NF_CT_EXT_ACCT] = sizeof(struct nf_conn_acct),
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ [NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ [NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ [NF_CT_EXT_SYNPROXY] = sizeof(struct nf_conn_synproxy),
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ [NF_CT_EXT_ACT_CT] = sizeof(struct nf_conn_act_ct_ext),
+#endif
+};
+
+static __always_inline unsigned int total_extension_size(void)
{
- unsigned int i;
- struct nf_ct_ext_type *t;
-
- for (i = 0; i < NF_CT_EXT_NUM; i++) {
- rcu_read_lock();
- t = rcu_dereference(nf_ct_ext_types[i]);
-
- /* Here the nf_ct_ext_type might have been unregisterd.
- * I.e., it has responsible to cleanup private
- * area in all conntracks when it is unregisterd.
- */
- if (t && t->destroy)
- t->destroy(ct);
- rcu_read_unlock();
- }
-
- kfree(ct->ext);
+ /* remember to add new extensions below */
+ BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
+
+ return sizeof(struct nf_ct_ext) +
+ sizeof(struct nf_conn_help)
+#if IS_ENABLED(CONFIG_NF_NAT)
+ + sizeof(struct nf_conn_nat)
+#endif
+ + sizeof(struct nf_conn_seqadj)
+ + sizeof(struct nf_conn_acct)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ + sizeof(struct nf_conntrack_ecache)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ + sizeof(struct nf_conn_tstamp)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ + sizeof(struct nf_conn_timeout)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ + sizeof(struct nf_conn_labels)
+#endif
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ + sizeof(struct nf_conn_synproxy)
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ + sizeof(struct nf_conn_act_ct_ext)
+#endif
+ ;
}
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
unsigned int newlen, newoff, oldlen, alloc;
- struct nf_ct_ext_type *t;
struct nf_ct_ext *new;
/* Conntrack must not be confirmed to avoid races on reallocation. */
WARN_ON(nf_ct_is_confirmed(ct));
+ /* struct nf_ct_ext uses u8 to store offsets/size */
+ BUILD_BUG_ON(total_extension_size() > 255u);
if (ct->ext) {
const struct nf_ct_ext *old = ct->ext;
@@ -58,24 +110,18 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
oldlen = sizeof(*new);
}
- rcu_read_lock();
- t = rcu_dereference(nf_ct_ext_types[id]);
- if (!t) {
- rcu_read_unlock();
- return NULL;
- }
-
- newoff = ALIGN(oldlen, t->align);
- newlen = newoff + t->len;
- rcu_read_unlock();
+ newoff = ALIGN(oldlen, __alignof__(struct nf_ct_ext));
+ newlen = newoff + nf_ct_ext_type_len[id];
alloc = max(newlen, NF_CT_EXT_PREALLOC);
new = krealloc(ct->ext, alloc, gfp);
if (!new)
return NULL;
- if (!ct->ext)
+ if (!ct->ext) {
memset(new->offset, 0, sizeof(new->offset));
+ new->gen_id = atomic_read(&nf_conntrack_ext_genid);
+ }
new->offset[id] = newoff;
new->len = newlen;
@@ -86,30 +132,28 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
}
EXPORT_SYMBOL(nf_ct_ext_add);
-/* This MUST be called in process context. */
-int nf_ct_extend_register(const struct nf_ct_ext_type *type)
+/* Use nf_ct_ext_find wrapper. This is only useful for unconfirmed entries. */
+void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id)
{
- int ret = 0;
+ unsigned int gen_id = atomic_read(&nf_conntrack_ext_genid);
+ unsigned int this_id = READ_ONCE(ext->gen_id);
- mutex_lock(&nf_ct_ext_type_mutex);
- if (nf_ct_ext_types[type->id]) {
- ret = -EBUSY;
- goto out;
- }
+ if (!__nf_ct_ext_exist(ext, id))
+ return NULL;
- rcu_assign_pointer(nf_ct_ext_types[type->id], type);
-out:
- mutex_unlock(&nf_ct_ext_type_mutex);
- return ret;
+ if (this_id == 0 || ext->gen_id == gen_id)
+ return (void *)ext + ext->offset[id];
+
+ return NULL;
}
-EXPORT_SYMBOL_GPL(nf_ct_extend_register);
+EXPORT_SYMBOL(__nf_ct_ext_find);
-/* This MUST be called in process context. */
-void nf_ct_extend_unregister(const struct nf_ct_ext_type *type)
+void nf_ct_ext_bump_genid(void)
{
- mutex_lock(&nf_ct_ext_type_mutex);
- RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
- mutex_unlock(&nf_ct_ext_type_mutex);
- synchronize_rcu();
+ unsigned int value = atomic_inc_return(&nf_conntrack_ext_genid);
+
+ if (value == UINT_MAX)
+ atomic_set(&nf_conntrack_ext_genid, 1);
+
+ msleep(HZ);
}
-EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index a414274338cf..617f744a2e3a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -33,10 +33,6 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
MODULE_DESCRIPTION("ftp connection tracking helper");
MODULE_ALIAS("ip_conntrack_ftp");
MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
-
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-
static DEFINE_SPINLOCK(nf_ftp_lock);
#define MAX_PORTS 8
@@ -398,6 +394,9 @@ static int help(struct sk_buff *skb,
return NF_ACCEPT;
}
+ if (unlikely(skb_linearize(skb)))
+ return NF_DROP;
+
th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
@@ -411,12 +410,9 @@ static int help(struct sk_buff *skb,
}
datalen = skb->len - dataoff;
+ /* seqadj (nat) uses ct->lock internally, nf_nat_ftp would cause deadlock */
spin_lock_bh(&nf_ftp_lock);
- fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
- if (!fb_ptr) {
- spin_unlock_bh(&nf_ftp_lock);
- return NF_ACCEPT;
- }
+ fb_ptr = skb->data + dataoff;
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
@@ -571,7 +567,6 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
static void __exit nf_conntrack_ftp_fini(void)
{
nf_conntrack_helpers_unregister(ftp, ports_c * 2);
- kfree(ftp_buffer);
}
static int __init nf_conntrack_ftp_init(void)
@@ -580,10 +575,6 @@ static int __init nf_conntrack_ftp_init(void)
NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master));
- ftp_buffer = kmalloc(65536, GFP_KERNEL);
- if (!ftp_buffer)
- return -ENOMEM;
-
if (ports_c == 0)
ports[ports_c++] = FTP_PORT;
@@ -603,7 +594,6 @@ static int __init nf_conntrack_ftp_init(void)
ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
- kfree(ftp_buffer);
return ret;
}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 2eb31ffb3d14..5a9bce24f3c3 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -34,6 +34,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_h323.h>
+#define H323_MAX_SIZE 65535
+
/* Parameters */
static unsigned int default_rrq_ttl __read_mostly = 300;
module_param(default_rrq_ttl, uint, 0600);
@@ -49,64 +51,8 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
"if both endpoints are on different sides "
"(determined by routing information)");
-/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
- unsigned char **data, int dataoff,
- H245_TransportAddress *taddr,
- union nf_inet_addr *addr, __be16 port)
- __read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff,
- unsigned char **data, int dataoff,
- TransportAddress *taddr,
- union nf_inet_addr *addr, __be16 port)
- __read_mostly;
-int (*set_sig_addr_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff, unsigned char **data,
- TransportAddress *taddr, int count) __read_mostly;
-int (*set_ras_addr_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff, unsigned char **data,
- TransportAddress *taddr, int count) __read_mostly;
-int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- unsigned char **data, int dataoff,
- H245_TransportAddress *taddr,
- __be16 port, __be16 rtp_port,
- struct nf_conntrack_expect *rtp_exp,
- struct nf_conntrack_expect *rtcp_exp) __read_mostly;
-int (*nat_t120_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- unsigned char **data, int dataoff,
- H245_TransportAddress *taddr, __be16 port,
- struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_h245_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- unsigned char **data, int dataoff,
- TransportAddress *taddr, __be16 port,
- struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_callforwarding_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- unsigned char **data, int dataoff,
- TransportAddress *taddr, __be16 port,
- struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_q931_hook) (struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff,
- unsigned char **data, TransportAddress *taddr, int idx,
- __be16 port, struct nf_conntrack_expect *exp)
- __read_mostly;
+const struct nfct_h323_nat_hooks __rcu *nfct_h323_nat_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfct_h323_nat_hook);
static DEFINE_SPINLOCK(nf_h323_lock);
static char *h323_buffer;
@@ -142,6 +88,9 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
if (tcpdatalen <= 0) /* No TCP data */
goto clear_out;
+ if (tcpdatalen > H323_MAX_SIZE)
+ tcpdatalen = H323_MAX_SIZE;
+
if (*data == NULL) { /* first TPKT */
/* Get first TPKT pointer */
tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
@@ -259,6 +208,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr)
{
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
__be16 port;
@@ -266,7 +216,6 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
union nf_inet_addr addr;
struct nf_conntrack_expect *rtp_exp;
struct nf_conntrack_expect *rtcp_exp;
- typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
/* Read RTP or RTCP address */
if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
@@ -296,15 +245,16 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_UDP, NULL, &rtcp_port);
+ nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
- (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
+ nathook &&
nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
- taddr, port, rtp_port, rtp_exp, rtcp_exp);
+ ret = nathook->nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
+ taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(rtp_exp, 0) == 0) {
if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
@@ -333,12 +283,12 @@ static int expect_t120(struct sk_buff *skb,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr)
{
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
__be16 port;
union nf_inet_addr addr;
struct nf_conntrack_expect *exp;
- typeof(nat_t120_hook) nat_t120;
/* Read T.120 address */
if (!get_h245_addr(ct, *data, taddr, &addr, &port) ||
@@ -355,15 +305,16 @@ static int expect_t120(struct sk_buff *skb,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple channels */
+ nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
- (nat_t120 = rcu_dereference(nat_t120_hook)) &&
+ nathook &&
nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
- port, exp);
+ ret = nathook->nat_t120(skb, ct, ctinfo, protoff, data,
+ dataoff, taddr, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_h323: expect T.120 ");
@@ -664,18 +615,19 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
return 1;
}
+EXPORT_SYMBOL_GPL(get_h225_addr);
static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
TransportAddress *taddr)
{
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
__be16 port;
union nf_inet_addr addr;
struct nf_conntrack_expect *exp;
- typeof(nat_h245_hook) nat_h245;
/* Read h245Address */
if (!get_h225_addr(ct, *data, taddr, &addr, &port) ||
@@ -692,15 +644,16 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
IPPROTO_TCP, NULL, &port);
exp->helper = &nf_conntrack_helper_h245;
+ nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
- (nat_h245 = rcu_dereference(nat_h245_hook)) &&
+ nathook &&
nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
- port, exp);
+ ret = nathook->nat_h245(skb, ct, ctinfo, protoff, data,
+ dataoff, taddr, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect H.245 ");
@@ -785,13 +738,13 @@ static int expect_callforwarding(struct sk_buff *skb,
unsigned char **data, int dataoff,
TransportAddress *taddr)
{
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
__be16 port;
union nf_inet_addr addr;
struct nf_conntrack_expect *exp;
struct net *net = nf_ct_net(ct);
- typeof(nat_callforwarding_hook) nat_callforwarding;
/* Read alternativeAddress */
if (!get_h225_addr(ct, *data, taddr, &addr, &port) || port == 0)
@@ -815,16 +768,17 @@ static int expect_callforwarding(struct sk_buff *skb,
IPPROTO_TCP, NULL, &port);
exp->helper = nf_conntrack_helper_q931;
+ nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
- (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
+ nathook &&
nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* Need NAT */
- ret = nat_callforwarding(skb, ct, ctinfo,
- protoff, data, dataoff,
- taddr, port, exp);
+ ret = nathook->nat_callforwarding(skb, ct, ctinfo,
+ protoff, data, dataoff,
+ taddr, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect Call Forwarding ");
@@ -844,12 +798,12 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data, int dataoff,
Setup_UUIE *setup)
{
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret;
int i;
__be16 port;
union nf_inet_addr addr;
- typeof(set_h225_addr_hook) set_h225_addr;
pr_debug("nf_ct_q931: Setup\n");
@@ -860,9 +814,9 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
return -1;
}
- set_h225_addr = rcu_dereference(set_h225_addr_hook);
+ nathook = rcu_dereference(nfct_h323_nat_hook);
if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
- (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK &&
get_h225_addr(ct, *data, &setup->destCallSignalAddress,
&addr, &port) &&
@@ -870,16 +824,16 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n",
&addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3,
ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr(skb, protoff, data, dataoff,
- &setup->destCallSignalAddress,
- &ct->tuplehash[!dir].tuple.src.u3,
- ct->tuplehash[!dir].tuple.src.u.tcp.port);
+ ret = nathook->set_h225_addr(skb, protoff, data, dataoff,
+ &setup->destCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ ct->tuplehash[!dir].tuple.src.u.tcp.port);
if (ret < 0)
return -1;
}
if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
- (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK &&
get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
&addr, &port) &&
@@ -887,10 +841,10 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n",
&addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3,
ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr(skb, protoff, data, dataoff,
- &setup->sourceCallSignalAddress,
- &ct->tuplehash[!dir].tuple.dst.u3,
- ct->tuplehash[!dir].tuple.dst.u.tcp.port);
+ ret = nathook->set_h225_addr(skb, protoff, data, dataoff,
+ &setup->sourceCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.dst.u.tcp.port);
if (ret < 0)
return -1;
}
@@ -1220,6 +1174,9 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
if (dataoff >= skb->len)
return NULL;
*datalen = skb->len - dataoff;
+ if (*datalen > H323_MAX_SIZE)
+ *datalen = H323_MAX_SIZE;
+
return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
}
@@ -1249,13 +1206,13 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
TransportAddress *taddr, int count)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret = 0;
int i;
__be16 port;
union nf_inet_addr addr;
struct nf_conntrack_expect *exp;
- typeof(nat_q931_hook) nat_q931;
/* Look for the first related address */
for (i = 0; i < count; i++) {
@@ -1279,11 +1236,11 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
exp->helper = nf_conntrack_helper_q931;
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
- nat_q931 = rcu_dereference(nat_q931_hook);
- if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) { /* Need NAT */
- ret = nat_q931(skb, ct, ctinfo, protoff, data,
- taddr, i, port, exp);
+ ret = nathook->nat_q931(skb, ct, ctinfo, protoff, data,
+ taddr, i, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1305,15 +1262,15 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
unsigned int protoff,
unsigned char **data, GatekeeperRequest *grq)
{
- typeof(set_ras_addr_hook) set_ras_addr;
+ const struct nfct_h323_nat_hooks *nathook;
pr_debug("nf_ct_ras: GRQ\n");
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) /* NATed */
- return set_ras_addr(skb, ct, ctinfo, protoff, data,
- &grq->rasAddress, 1);
+ return nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+ &grq->rasAddress, 1);
return 0;
}
@@ -1367,8 +1324,8 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data, RegistrationRequest *rrq)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
+ const struct nfct_h323_nat_hooks *nathook;
int ret;
- typeof(set_ras_addr_hook) set_ras_addr;
pr_debug("nf_ct_ras: RRQ\n");
@@ -1378,12 +1335,12 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
if (ret < 0)
return -1;
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(skb, ct, ctinfo, protoff, data,
- rrq->rasAddress.item,
- rrq->rasAddress.count);
+ ret = nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+ rrq->rasAddress.item,
+ rrq->rasAddress.count);
if (ret < 0)
return -1;
}
@@ -1403,19 +1360,19 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data, RegistrationConfirm *rcf)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret;
struct nf_conntrack_expect *exp;
- typeof(set_sig_addr_hook) set_sig_addr;
pr_debug("nf_ct_ras: RCF\n");
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
- rcf->callSignalAddress.item,
- rcf->callSignalAddress.count);
+ ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data,
+ rcf->callSignalAddress.item,
+ rcf->callSignalAddress.count);
if (ret < 0)
return -1;
}
@@ -1454,18 +1411,18 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data, UnregistrationRequest *urq)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
int ret;
- typeof(set_sig_addr_hook) set_sig_addr;
pr_debug("nf_ct_ras: URQ\n");
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
- urq->callSignalAddress.item,
- urq->callSignalAddress.count);
+ ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data,
+ urq->callSignalAddress.item,
+ urq->callSignalAddress.count);
if (ret < 0)
return -1;
}
@@ -1487,39 +1444,42 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
unsigned char **data, AdmissionRequest *arq)
{
const struct nf_ct_h323_master *info = nfct_help_data(ct);
+ const struct nfct_h323_nat_hooks *nathook;
int dir = CTINFO2DIR(ctinfo);
__be16 port;
union nf_inet_addr addr;
- typeof(set_h225_addr_hook) set_h225_addr;
pr_debug("nf_ct_ras: ARQ\n");
- set_h225_addr = rcu_dereference(set_h225_addr_hook);
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (!nathook)
+ return 0;
+
if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
get_h225_addr(ct, *data, &arq->destCallSignalAddress,
&addr, &port) &&
!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
port == info->sig_port[dir] &&
nf_ct_l3num(ct) == NFPROTO_IPV4 &&
- set_h225_addr && ct->status & IPS_NAT_MASK) {
+ ct->status & IPS_NAT_MASK) {
/* Answering ARQ */
- return set_h225_addr(skb, protoff, data, 0,
- &arq->destCallSignalAddress,
- &ct->tuplehash[!dir].tuple.dst.u3,
- info->sig_port[!dir]);
+ return nathook->set_h225_addr(skb, protoff, data, 0,
+ &arq->destCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
}
if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
get_h225_addr(ct, *data, &arq->srcCallSignalAddress,
&addr, &port) &&
!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
- set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* Calling ARQ */
- return set_h225_addr(skb, protoff, data, 0,
- &arq->srcCallSignalAddress,
- &ct->tuplehash[!dir].tuple.dst.u3,
- port);
+ return nathook->set_h225_addr(skb, protoff, data, 0,
+ &arq->srcCallSignalAddress,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ port);
}
return 0;
@@ -1535,7 +1495,6 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
__be16 port;
union nf_inet_addr addr;
struct nf_conntrack_expect *exp;
- typeof(set_sig_addr_hook) set_sig_addr;
pr_debug("nf_ct_ras: ACF\n");
@@ -1544,12 +1503,15 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
return 0;
if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) {
+ const struct nfct_h323_nat_hooks *nathook;
+
/* Answering ACF */
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK)
- return set_sig_addr(skb, ct, ctinfo, protoff, data,
- &acf->destCallSignalAddress, 1);
+ return nathook->set_sig_addr(skb, ct, ctinfo, protoff,
+ data,
+ &acf->destCallSignalAddress, 1);
return 0;
}
@@ -1578,15 +1540,15 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
unsigned int protoff,
unsigned char **data, LocationRequest *lrq)
{
- typeof(set_ras_addr_hook) set_ras_addr;
+ const struct nfct_h323_nat_hooks *nathook;
pr_debug("nf_ct_ras: LRQ\n");
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK)
- return set_ras_addr(skb, ct, ctinfo, protoff, data,
- &lrq->replyAddress, 1);
+ return nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+ &lrq->replyAddress, 1);
return 0;
}
@@ -1634,27 +1596,22 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
unsigned int protoff,
unsigned char **data, InfoRequestResponse *irr)
{
+ const struct nfct_h323_nat_hooks *nathook;
int ret;
- typeof(set_ras_addr_hook) set_ras_addr;
- typeof(set_sig_addr_hook) set_sig_addr;
pr_debug("nf_ct_ras: IRR\n");
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ nathook = rcu_dereference(nfct_h323_nat_hook);
+ if (nathook && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(skb, ct, ctinfo, protoff, data,
- &irr->rasAddress, 1);
+ ret = nathook->set_ras_addr(skb, ct, ctinfo, protoff, data,
+ &irr->rasAddress, 1);
if (ret < 0)
return -1;
- }
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
- ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
- irr->callSignalAddress.item,
- irr->callSignalAddress.count);
+ ret = nathook->set_sig_addr(skb, ct, ctinfo, protoff, data,
+ irr->callSignalAddress.item,
+ irr->callSignalAddress.count);
if (ret < 0)
return -1;
}
@@ -1821,7 +1778,7 @@ static int __init nf_conntrack_h323_init(void)
NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master));
- h323_buffer = kmalloc(65536, GFP_KERNEL);
+ h323_buffer = kmalloc(H323_MAX_SIZE + 1, GFP_KERNEL);
if (!h323_buffer)
return -ENOMEM;
ret = h323_helper_init();
@@ -1837,17 +1794,6 @@ err1:
module_init(nf_conntrack_h323_init);
module_exit(nf_conntrack_h323_fini);
-EXPORT_SYMBOL_GPL(get_h225_addr);
-EXPORT_SYMBOL_GPL(set_h245_addr_hook);
-EXPORT_SYMBOL_GPL(set_h225_addr_hook);
-EXPORT_SYMBOL_GPL(set_sig_addr_hook);
-EXPORT_SYMBOL_GPL(set_ras_addr_hook);
-EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
-EXPORT_SYMBOL_GPL(nat_t120_hook);
-EXPORT_SYMBOL_GPL(nat_h245_hook);
-EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
-EXPORT_SYMBOL_GPL(nat_q931_hook);
-
MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
MODULE_DESCRIPTION("H.323 connection tracking helper");
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ae4488a13c70..ff737a76052e 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -35,11 +35,6 @@ unsigned int nf_ct_helper_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_helper_hsize);
static unsigned int nf_ct_helper_count __read_mostly;
-static bool nf_ct_auto_assign_helper __read_mostly = false;
-module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
-MODULE_PARM_DESC(nf_conntrack_helper,
- "Enable automatic conntrack helper assignment (default 0)");
-
static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
static struct list_head nf_ct_nat_helpers __read_mostly;
@@ -51,24 +46,6 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
(__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
}
-static struct nf_conntrack_helper *
-__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
-{
- struct nf_conntrack_helper *helper;
- struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
- unsigned int h;
-
- if (!nf_ct_helper_count)
- return NULL;
-
- h = helper_hash(tuple);
- hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) {
- if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
- return helper;
- }
- return NULL;
-}
-
struct nf_conntrack_helper *
__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
{
@@ -165,7 +142,7 @@ nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
if (!nat) {
snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name);
rcu_read_unlock();
- request_module(mod_name);
+ request_module("%s", mod_name);
rcu_read_lock();
nat = nf_conntrack_nat_helper_find(mod_name);
@@ -209,33 +186,11 @@ nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
-static struct nf_conntrack_helper *
-nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
-{
- struct nf_conntrack_net *cnet = nf_ct_pernet(net);
-
- if (!cnet->sysctl_auto_assign_helper) {
- if (cnet->auto_assign_helper_warned)
- return NULL;
- if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple))
- return NULL;
- pr_info("nf_conntrack: default automatic helper assignment "
- "has been turned off for security reasons and CT-based "
- "firewall rule not found. Use the iptables CT target "
- "to attach helpers instead.\n");
- cnet->auto_assign_helper_warned = true;
- return NULL;
- }
-
- return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-}
-
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags)
{
struct nf_conntrack_helper *helper = NULL;
struct nf_conn_help *help;
- struct net *net = nf_ct_net(ct);
/* We already got a helper explicitly attached. The function
* nf_conntrack_alter_reply - in case NAT is in use - asks for looking
@@ -246,23 +201,21 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
if (test_bit(IPS_HELPER_BIT, &ct->status))
return 0;
- if (tmpl != NULL) {
- help = nfct_help(tmpl);
- if (help != NULL) {
- helper = help->helper;
- set_bit(IPS_HELPER_BIT, &ct->status);
- }
+ if (WARN_ON_ONCE(!tmpl))
+ return 0;
+
+ help = nfct_help(tmpl);
+ if (help != NULL) {
+ helper = rcu_dereference(help->helper);
+ set_bit(IPS_HELPER_BIT, &ct->status);
}
help = nfct_help(ct);
if (helper == NULL) {
- helper = nf_ct_lookup_helper(ct, net);
- if (helper == NULL) {
- if (help)
- RCU_INIT_POINTER(help->helper, NULL);
- return 0;
- }
+ if (help)
+ RCU_INIT_POINTER(help->helper, NULL);
+ return 0;
}
if (help == NULL) {
@@ -468,11 +421,6 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
-
- /* Maybe someone has gotten the helper already when unhelp above.
- * So need to wait it.
- */
- synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
@@ -550,43 +498,19 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat)
}
EXPORT_SYMBOL_GPL(nf_nat_helper_unregister);
-static const struct nf_ct_ext_type helper_extend = {
- .len = sizeof(struct nf_conn_help),
- .align = __alignof__(struct nf_conn_help),
- .id = NF_CT_EXT_HELPER,
-};
-
-void nf_conntrack_helper_pernet_init(struct net *net)
-{
- struct nf_conntrack_net *cnet = nf_ct_pernet(net);
-
- cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
-}
-
int nf_conntrack_helper_init(void)
{
- int ret;
nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
nf_ct_helper_hash =
nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
if (!nf_ct_helper_hash)
return -ENOMEM;
- ret = nf_ct_extend_register(&helper_extend);
- if (ret < 0) {
- pr_err("nf_ct_helper: Unable to register helper extension.\n");
- goto out_extend;
- }
-
INIT_LIST_HEAD(&nf_ct_nat_helpers);
return 0;
-out_extend:
- kvfree(nf_ct_helper_hash);
- return ret;
}
void nf_conntrack_helper_fini(void)
{
- nf_ct_extend_unregister(&helper_extend);
kvfree(nf_ct_helper_hash);
}
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 08ee4e760a3d..5703846bea3b 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -39,6 +39,7 @@ unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_irc_hook);
#define HELPER_NAME "irc"
+#define MAX_SEARCH_SIZE 4095
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
@@ -121,6 +122,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
int i, ret = NF_ACCEPT;
char *addr_beg_p, *addr_end_p;
typeof(nf_nat_irc_hook) nf_nat_irc;
+ unsigned int datalen;
/* If packet is coming from IRC server */
if (dir == IP_CT_DIR_REPLY)
@@ -140,8 +142,12 @@ static int help(struct sk_buff *skb, unsigned int protoff,
if (dataoff >= skb->len)
return NF_ACCEPT;
+ datalen = skb->len - dataoff;
+ if (datalen > MAX_SEARCH_SIZE)
+ datalen = MAX_SEARCH_SIZE;
+
spin_lock_bh(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
+ ib_ptr = skb_header_pointer(skb, dataoff, datalen,
irc_buffer);
if (!ib_ptr) {
spin_unlock_bh(&irc_buffer_lock);
@@ -149,17 +155,39 @@ static int help(struct sk_buff *skb, unsigned int protoff,
}
data = ib_ptr;
- data_limit = ib_ptr + skb->len - dataoff;
+ data_limit = ib_ptr + datalen;
+
+ /* Skip any whitespace */
+ while (data < data_limit - 10) {
+ if (*data == ' ' || *data == '\r' || *data == '\n')
+ data++;
+ else
+ break;
+ }
- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
- while (data < data_limit - (19 + MINMATCHLEN)) {
- if (memcmp(data, "\1DCC ", 5)) {
+ /* strlen("PRIVMSG x ")=10 */
+ if (data < data_limit - 10) {
+ if (strncasecmp("PRIVMSG ", data, 8))
+ goto out;
+ data += 8;
+ }
+
+ /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26
+ * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26
+ */
+ while (data < data_limit - (21 + MINMATCHLEN)) {
+ /* Find first " :", the start of message */
+ if (memcmp(data, " :", 2)) {
data++;
continue;
}
+ data += 2;
+
+ /* then check that place only for the DCC command */
+ if (memcmp(data, "\1DCC ", 5))
+ goto out;
data += 5;
- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+ /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */
iph = ip_hdr(skb);
pr_debug("DCC found in master %pI4:%u %pI4:%u\n",
@@ -175,7 +203,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
pr_debug("DCC %s detected\n", dccprotos[i]);
/* we have at least
- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+ * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid
* data left (== 14/13 bytes) */
if (parse_dcc(data, data_limit, &dcc_ip,
&dcc_port, &addr_beg_p, &addr_end_p)) {
@@ -188,8 +216,9 @@ static int help(struct sk_buff *skb, unsigned int protoff,
/* dcc_ip can be the internal OR external (NAT'ed) IP */
tuple = &ct->tuplehash[dir].tuple;
- if (tuple->src.u3.ip != dcc_ip &&
- tuple->dst.u3.ip != dcc_ip) {
+ if ((tuple->src.u3.ip != dcc_ip &&
+ ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) ||
+ dcc_port == 0) {
net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
&tuple->src.u3.ip,
&dcc_ip, dcc_port);
@@ -251,7 +280,7 @@ static int __init nf_conntrack_irc_init(void)
irc_exp_policy.max_expected = max_dcc_channels;
irc_exp_policy.timeout = dcc_timeout;
- irc_buffer = kmalloc(65536, GFP_KERNEL);
+ irc_buffer = kmalloc(MAX_SEARCH_SIZE + 1, GFP_KERNEL);
if (!irc_buffer)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 522792556632..6e70e137a0a6 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -67,6 +67,8 @@ int nf_connlabels_get(struct net *net, unsigned int bits)
net->ct.labels_used++;
spin_unlock(&nf_connlabels_lock);
+ BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_connlabels_get);
@@ -78,21 +80,3 @@ void nf_connlabels_put(struct net *net)
spin_unlock(&nf_connlabels_lock);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
-
-static const struct nf_ct_ext_type labels_extend = {
- .len = sizeof(struct nf_conn_labels),
- .align = __alignof__(struct nf_conn_labels),
- .id = NF_CT_EXT_LABELS,
-};
-
-int nf_conntrack_labels_init(void)
-{
- BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX);
-
- return nf_ct_extend_register(&labels_extend);
-}
-
-void nf_conntrack_labels_fini(void)
-{
- nf_ct_extend_unregister(&labels_extend);
-}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 7f19ee259609..55415f011943 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -20,13 +20,14 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#define HELPER_NAME "netbios-ns"
#define NMBD_PORT 137
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_netbios_ns");
-MODULE_ALIAS_NFCT_HELPER("netbios_ns");
+MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
static unsigned int timeout __read_mostly = 3;
module_param(timeout, uint, 0400);
@@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
}
static struct nf_conntrack_helper helper __read_mostly = {
- .name = "netbios-ns",
+ .name = HELPER_NAME,
.tuple.src.l3num = NFPROTO_IPV4,
.tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ac438370f94a..7562b215b932 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -58,6 +58,12 @@
MODULE_LICENSE("GPL");
+struct ctnetlink_list_dump_ctx {
+ struct nf_conn *last;
+ unsigned int cpu;
+ bool done;
+};
+
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *l4proto)
@@ -1197,6 +1203,7 @@ restart:
hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_is_expired(ct)) {
+ /* need to defer nf_ct_kill() until lock is released */
if (i < ARRAY_SIZE(nf_ct_evict) &&
refcount_inc_not_zero(&ct->ct_general.use))
nf_ct_evict[i++] = ct;
@@ -1553,6 +1560,11 @@ static int ctnetlink_flush_conntrack(struct net *net,
u32 portid, int report, u8 family)
{
struct ctnetlink_filter *filter = NULL;
+ struct nf_ct_iter_data iter = {
+ .net = net,
+ .portid = portid,
+ .report = report,
+ };
if (ctnetlink_needs_filter(family, cda)) {
if (cda[CTA_FILTER])
@@ -1561,10 +1573,11 @@ static int ctnetlink_flush_conntrack(struct net *net,
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
+
+ iter.data = filter;
}
- nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
- portid, report);
+ nf_ct_iterate_cleanup_net(ctnetlink_flush_iterate, &iter);
kfree(filter);
return 0;
@@ -1694,87 +1707,109 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
static int ctnetlink_done_list(struct netlink_callback *cb)
{
- if (cb->args[1])
- nf_ct_put((struct nf_conn *)cb->args[1]);
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+
+ if (ctx->last)
+ nf_ct_put(ctx->last);
+
return 0;
}
-static int
-ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_dump_one_entry(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nf_conn *ct,
+ bool dying)
{
- struct nf_conn *ct, *last;
- struct nf_conntrack_tuple_hash *h;
- struct hlist_nulls_node *n;
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- u_int8_t l3proto = nfmsg->nfgen_family;
+ u8 l3proto = nfmsg->nfgen_family;
int res;
- int cpu;
- struct hlist_nulls_head *list;
- struct net *net = sock_net(skb->sk);
- if (cb->args[2])
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
return 0;
- last = (struct nf_conn *)cb->args[1];
-
- for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
- struct ct_pcpu *pcpu;
+ if (ctx->last) {
+ if (ct != ctx->last)
+ return 0;
- if (!cpu_possible(cpu))
- continue;
+ ctx->last = NULL;
+ }
- pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
- spin_lock_bh(&pcpu->lock);
- list = dying ? &pcpu->dying : &pcpu->unconfirmed;
-restart:
- hlist_nulls_for_each_entry(h, n, list, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
- continue;
- if (cb->args[1]) {
- if (ct != last)
- continue;
- cb->args[1] = 0;
- }
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct, dying, 0);
+ if (res < 0) {
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ return 0;
- /* We can't dump extension info for the unconfirmed
- * list because unconfirmed conntracks can have
- * ct->ext reallocated (and thus freed).
- *
- * In the dying list case ct->ext can't be free'd
- * until after we drop pcpu->lock.
- */
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct, dying, 0);
- if (res < 0) {
- if (!refcount_inc_not_zero(&ct->ct_general.use))
- continue;
- cb->args[0] = cpu;
- cb->args[1] = (unsigned long)ct;
- spin_unlock_bh(&pcpu->lock);
- goto out;
- }
- }
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
- }
- spin_unlock_bh(&pcpu->lock);
+ ctx->last = ct;
}
- cb->args[2] = 1;
-out:
- if (last)
- nf_ct_put(last);
- return skb->len;
+ return res;
+}
+#endif
+
+static int
+ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return 0;
}
static int
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
- return ctnetlink_dump_list(skb, cb, true);
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+ struct nf_conn *last = ctx->last;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ const struct net *net = sock_net(skb->sk);
+ struct nf_conntrack_net_ecache *ecache_net;
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_node *n;
+#endif
+
+ if (ctx->done)
+ return 0;
+
+ ctx->last = NULL;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ ecache_net = nf_conn_pernet_ecache(net);
+ spin_lock_bh(&ecache_net->dying_lock);
+
+ hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) {
+ struct nf_conn *ct;
+ int res;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (last && last != ct)
+ continue;
+
+ res = ctnetlink_dump_one_entry(skb, cb, ct, true);
+ if (res < 0) {
+ spin_unlock_bh(&ecache_net->dying_lock);
+ nf_ct_put(last);
+ return skb->len;
+ }
+
+ nf_ct_put(last);
+ last = NULL;
+ }
+
+ spin_unlock_bh(&ecache_net->dying_lock);
+#endif
+ ctx->done = true;
+ nf_ct_put(last);
+
+ return skb->len;
}
static int ctnetlink_get_ct_dying(struct sk_buff *skb,
@@ -1792,12 +1827,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int
-ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return ctnetlink_dump_list(skb, cb, false);
-}
-
static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
@@ -1862,45 +1891,10 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif
-static void
-__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
- unsigned long off)
-{
- unsigned int bit;
-
- /* Ignore these unchangable bits */
- on &= ~IPS_UNCHANGEABLE_MASK;
- off &= ~IPS_UNCHANGEABLE_MASK;
-
- for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
- if (on & (1 << bit))
- set_bit(bit, &ct->status);
- else if (off & (1 << bit))
- clear_bit(bit, &ct->status);
- }
-}
-
static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
- unsigned long d;
- unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
- d = ct->status ^ status;
-
- if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
- /* unchangeable */
- return -EBUSY;
-
- if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
- /* SEEN_REPLY bit can only be set */
- return -EBUSY;
-
- if (d & IPS_ASSURED && !(status & IPS_ASSURED))
- /* ASSURED bit can only be set */
- return -EBUSY;
-
- __ctnetlink_change_status(ct, status, 0);
- return 0;
+ return nf_ct_change_status_common(ct, ntohl(nla_get_be32(cda[CTA_STATUS])));
}
static int
@@ -1976,7 +1970,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
}
if (help) {
- if (help->helper == helper) {
+ if (rcu_access_pointer(help->helper) == helper) {
/* update private helper data if allowed. */
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
@@ -1995,16 +1989,7 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
static int ctnetlink_change_timeout(struct nf_conn *ct,
const struct nlattr * const cda[])
{
- u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
-
- if (timeout > INT_MAX)
- timeout = INT_MAX;
- WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
-
- if (test_bit(IPS_DYING_BIT, &ct->status))
- return -ETIME;
-
- return 0;
+ return __nf_ct_change_timeout(ct, (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ);
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
@@ -2264,9 +2249,7 @@ ctnetlink_create_conntrack(struct net *net,
goto err1;
timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
- if (timeout > INT_MAX)
- timeout = INT_MAX;
- ct->timeout = (u32)timeout + nfct_time_stamp;
+ __nf_ct_set_timeout(ct, timeout);
rcu_read_lock();
if (cda[CTA_HELP]) {
@@ -2311,14 +2294,10 @@ ctnetlink_create_conntrack(struct net *net,
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
- /* not in hash table yet so not strictly necessary */
+ /* disable helper auto-assignment for this entry */
+ ct->status |= IPS_HELPER;
RCU_INIT_POINTER(help->helper, helper);
}
- } else {
- /* try an implicit helper assignation */
- err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
- if (err < 0)
- goto err2;
}
err = ctnetlink_setup_nat(ct, cda);
@@ -2807,7 +2786,7 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
* unchangeable bits but do not error out. Also user programs
* are allowed to clear the bits that they are allowed to change.
*/
- __ctnetlink_change_status(ct, status, ~status);
+ __nf_ct_change_status(ct, status, ~status);
return 0;
}
@@ -3383,12 +3362,17 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
{
+ struct nf_conntrack_helper *helper;
const struct nf_conn_help *m_help;
const char *name = data;
m_help = nfct_help(exp->master);
- return strcmp(m_help->helper->name, name) == 0;
+ helper = rcu_dereference(m_help->helper);
+ if (!helper)
+ return false;
+
+ return strcmp(helper->name, name) == 0;
}
static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
@@ -3877,6 +3861,8 @@ static int __init ctnetlink_init(void)
{
int ret;
+ BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx));
+
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7d5708b92138..4c679638df06 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -45,30 +45,8 @@ MODULE_ALIAS_NFCT_HELPER("pptp");
static DEFINE_SPINLOCK(nf_pptp_lock);
-int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff, struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
-
-int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- unsigned int protoff, struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound);
-
-void
-(*nf_nat_pptp_hook_exp_gre)(struct nf_conntrack_expect *expect_orig,
- struct nf_conntrack_expect *expect_reply)
- __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_exp_gre);
-
-void
-(*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct,
- struct nf_conntrack_expect *exp) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
+const struct nf_nat_pptp_hook __rcu *nf_nat_pptp_hook;
+EXPORT_SYMBOL_GPL(nf_nat_pptp_hook);
#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
/* PptpControlMessageType names */
@@ -111,8 +89,8 @@ EXPORT_SYMBOL(pptp_msg_name);
static void pptp_expectfn(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ const struct nf_nat_pptp_hook *hook;
struct net *net = nf_ct_net(ct);
- typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
pr_debug("increasing timeouts\n");
/* increase timeout of GRE data channel conntrack entry */
@@ -122,9 +100,9 @@ static void pptp_expectfn(struct nf_conn *ct,
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
- if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
- nf_nat_pptp_expectfn(ct, exp);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->master->status & IPS_NAT_MASK)
+ hook->expectfn(ct, exp);
else {
struct nf_conntrack_tuple inv_t;
struct nf_conntrack_expect *exp_other;
@@ -209,9 +187,9 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
{
struct nf_conntrack_expect *exp_orig, *exp_reply;
+ const struct nf_nat_pptp_hook *hook;
enum ip_conntrack_dir dir;
int ret = 1;
- typeof(nf_nat_pptp_hook_exp_gre) nf_nat_pptp_exp_gre;
exp_orig = nf_ct_expect_alloc(ct);
if (exp_orig == NULL)
@@ -239,9 +217,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
IPPROTO_GRE, &callid, &peer_callid);
exp_reply->expectfn = pptp_expectfn;
- nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
- if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
- nf_nat_pptp_exp_gre(exp_orig, exp_reply);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->status & IPS_NAT_MASK)
+ hook->exp_gre(exp_orig, exp_reply);
if (nf_ct_expect_related(exp_orig, 0) != 0)
goto out_put_both;
if (nf_ct_expect_related(exp_reply, 0) != 0)
@@ -279,9 +257,9 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
enum ip_conntrack_info ctinfo)
{
struct nf_ct_pptp_master *info = nfct_help_data(ct);
+ const struct nf_nat_pptp_hook *hook;
u_int16_t msg;
__be16 cid = 0, pcid = 0;
- typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
msg = ntohs(ctlh->messageType);
pr_debug("inbound control message %s\n", pptp_msg_name(msg));
@@ -383,10 +361,9 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
goto invalid;
}
- nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
- if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_inbound(skb, ct, ctinfo,
- protoff, ctlh, pptpReq);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->status & IPS_NAT_MASK)
+ return hook->inbound(skb, ct, ctinfo, protoff, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -407,9 +384,9 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
enum ip_conntrack_info ctinfo)
{
struct nf_ct_pptp_master *info = nfct_help_data(ct);
+ const struct nf_nat_pptp_hook *hook;
u_int16_t msg;
__be16 cid = 0, pcid = 0;
- typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
msg = ntohs(ctlh->messageType);
pr_debug("outbound control message %s\n", pptp_msg_name(msg));
@@ -479,10 +456,9 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
goto invalid;
}
- nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
- if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_outbound(skb, ct, ctinfo,
- protoff, ctlh, pptpReq);
+ hook = rcu_dereference(nf_nat_pptp_hook);
+ if (hook && ct->status & IPS_NAT_MASK)
+ return hook->outbound(skb, ct, ctinfo, protoff, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index d1f2d3c8d2b1..895b09cbd7cf 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -538,9 +538,13 @@ retry:
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
- if (fixup_needed)
- nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup,
- (void *)(unsigned long)nfproto, 0, 0);
+ if (fixup_needed) {
+ struct nf_ct_iter_data iter_data = {
+ .net = net,
+ .data = (void *)(unsigned long)nfproto,
+ };
+ nf_ct_iterate_cleanup_net(nf_ct_tcp_fixup, &iter_data);
+ }
return err;
}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 2394238d01c9..5a936334b517 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
pr_debug("Setting vtag %x for dir %d\n",
ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
+
+ /* don't renew timeout on init retransmit so
+ * port reuse by client or NAT middlebox cannot
+ * keep entry alive indefinitely (incl. nat info).
+ */
+ if (new_state == SCTP_CONNTRACK_CLOSED &&
+ old_state == SCTP_CONNTRACK_CLOSED &&
+ nf_ct_is_confirmed(ct))
+ ignore = true;
}
ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af5115e127cf..656631083177 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -47,6 +47,12 @@ static const char *const tcp_conntrack_names[] = {
"SYN_SENT2",
};
+enum nf_ct_tcp_action {
+ NFCT_TCP_IGNORE,
+ NFCT_TCP_INVALID,
+ NFCT_TCP_ACCEPT,
+};
+
#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
@@ -341,8 +347,8 @@ static void tcp_options(const struct sk_buff *skb,
if (!ptr)
return;
- state->td_scale =
- state->flags = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
while (length > 0) {
int opcode=*ptr++;
@@ -446,24 +452,71 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
-static bool tcp_in_window(struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- unsigned int index,
- const struct sk_buff *skb,
- unsigned int dataoff,
- const struct tcphdr *tcph,
- const struct nf_hook_state *hook_state)
+static void tcp_init_sender(struct ip_ct_tcp_state *sender,
+ struct ip_ct_tcp_state *receiver,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct tcphdr *tcph,
+ u32 end, u32 win)
+{
+ /* SYN-ACK in reply to a SYN
+ * or SYN from reply direction in simultaneous open.
+ */
+ sender->td_end =
+ sender->td_maxend = end;
+ sender->td_maxwin = (win == 0 ? 1 : win);
+
+ tcp_options(skb, dataoff, tcph, sender);
+ /* RFC 1323:
+ * Both sides must send the Window Scale option
+ * to enable window scaling in either direction.
+ */
+ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
+ sender->td_scale = 0;
+ receiver->td_scale = 0;
+ }
+}
+
+__printf(6, 7)
+static enum nf_ct_tcp_action nf_tcp_log_invalid(const struct sk_buff *skb,
+ const struct nf_conn *ct,
+ const struct nf_hook_state *state,
+ const struct ip_ct_tcp_state *sender,
+ enum nf_ct_tcp_action ret,
+ const char *fmt, ...)
+{
+ const struct nf_tcp_net *tn = nf_tcp_pernet(nf_ct_net(ct));
+ struct va_format vaf;
+ va_list args;
+ bool be_liberal;
+
+ be_liberal = sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal;
+ if (be_liberal)
+ return NFCT_TCP_ACCEPT;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%pV", &vaf);
+ va_end(args);
+
+ return ret;
+}
+
+static enum nf_ct_tcp_action
+tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
+ unsigned int index, const struct sk_buff *skb,
+ unsigned int dataoff, const struct tcphdr *tcph,
+ const struct nf_hook_state *hook_state)
{
struct ip_ct_tcp *state = &ct->proto.tcp;
- struct net *net = nf_ct_net(ct);
- struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
- const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
- u16 win_raw;
+ bool in_recv_win, seq_ok;
s32 receiver_offset;
- bool res, in_recv_win;
+ u16 win_raw;
/*
* Get the required data from the packet.
@@ -482,44 +535,17 @@ static bool tcp_in_window(struct nf_conn *ct,
ack -= receiver_offset;
sack -= receiver_offset;
- pr_debug("tcp_in_window: START\n");
- pr_debug("tcp_in_window: ");
- nf_ct_dump_tuple(tuple);
- pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
- seq, ack, receiver_offset, sack, receiver_offset, win, end);
- pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
if (sender->td_maxwin == 0) {
/*
* Initialize sender data.
*/
if (tcph->syn) {
- /*
- * SYN-ACK in reply to a SYN
- * or SYN from reply direction in simultaneous open.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, dataoff, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
+ tcp_init_sender(sender, receiver,
+ skb, dataoff, tcph,
+ end, win);
if (!tcph->ack)
/* Simultaneous open */
- return true;
+ return NFCT_TCP_ACCEPT;
} else {
/*
* We are in the middle of a connection,
@@ -545,21 +571,24 @@ static bool tcp_in_window(struct nf_conn *ct,
}
}
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
+ } else if (tcph->syn &&
+ after(end, sender->td_end) &&
+ (state->state == TCP_CONNTRACK_SYN_SENT ||
+ state->state == TCP_CONNTRACK_SYN_RECV)) {
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
+ *
+ * Re-init state for this direction, just like for the first
+ * syn(-ack) reply, it might differ in seq, ack or tcp options.
*/
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
+ tcp_init_sender(sender, receiver,
+ skb, dataoff, tcph,
+ end, win);
- tcp_options(skb, dataoff, tcph, sender);
+ if (dir == IP_CT_DIR_REPLY && !tcph->ack)
+ return NFCT_TCP_ACCEPT;
}
if (!(tcph->ack)) {
@@ -583,113 +612,166 @@ static bool tcp_in_window(struct nf_conn *ct,
*/
seq = end = sender->td_end;
- pr_debug("tcp_in_window: ");
- nf_ct_dump_tuple(tuple);
- pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
- seq, ack, receiver_offset, sack, receiver_offset, win, end);
- pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
+ seq_ok = before(seq, sender->td_maxend + 1);
+ if (!seq_ok) {
+ u32 overshot = end - sender->td_maxend + 1;
+ bool ack_ok;
+
+ ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
+ in_recv_win = receiver->td_maxwin &&
+ after(end, sender->td_end - receiver->td_maxwin - 1);
+
+ if (in_recv_win &&
+ ack_ok &&
+ overshot <= receiver->td_maxwin &&
+ before(sack, receiver->td_end + 1)) {
+ /* Work around TCPs that send more bytes than allowed by
+ * the receive window.
+ *
+ * If the (marked as invalid) packet is allowed to pass by
+ * the ruleset and the peer acks this data, then its possible
+ * all future packets will trigger 'ACK is over upper bound' check.
+ *
+ * Thus if only the sequence check fails then do update td_end so
+ * possible ACK for this data can update internal state.
+ */
+ sender->td_end = end;
+ sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+ return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+ "%u bytes more than expected", overshot);
+ }
+
+ return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID,
+ "SEQ is over upper bound %u (over the window of the receiver)",
+ sender->td_maxend + 1);
+ }
+
+ if (!before(sack, receiver->td_end + 1))
+ return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID,
+ "ACK is over upper bound %u (ACKed data not seen yet)",
+ receiver->td_end + 1);
/* Is the ending sequence in the receive window (if available)? */
in_recv_win = !receiver->td_maxwin ||
after(end, sender->td_end - receiver->td_maxwin - 1);
+ if (!in_recv_win)
+ return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+ "SEQ is under lower bound %u (already ACKed data retransmitted)",
+ sender->td_end - receiver->td_maxwin - 1);
+ if (!after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1))
+ return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE,
+ "ignored ACK under lower bound %u (possible overly delayed)",
+ receiver->td_end - MAXACKWINDOW(sender) - 1);
+
+ /* Take into account window scaling (RFC 1323). */
+ if (!tcph->syn)
+ win <<= sender->td_scale;
+
+ /* Update sender data. */
+ swin = win + (sack - ack);
+ if (sender->td_maxwin < swin)
+ sender->td_maxwin = swin;
+ if (after(end, sender->td_end)) {
+ sender->td_end = end;
+ sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+ }
+ if (tcph->ack) {
+ if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
+ sender->td_maxack = ack;
+ sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
+ } else if (after(ack, sender->td_maxack)) {
+ sender->td_maxack = ack;
+ }
+ }
- pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
- before(seq, sender->td_maxend + 1),
- (in_recv_win ? 1 : 0),
- before(sack, receiver->td_end + 1),
- after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
+ /* Update receiver data. */
+ if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
+ receiver->td_maxwin += end - sender->td_maxend;
+ if (after(sack + win, receiver->td_maxend - 1)) {
+ receiver->td_maxend = sack + win;
+ if (win == 0)
+ receiver->td_maxend++;
+ }
+ if (ack == receiver->td_end)
+ receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+ /* Check retransmissions. */
+ if (index == TCP_ACK_SET) {
+ if (state->last_dir == dir &&
+ state->last_seq == seq &&
+ state->last_ack == ack &&
+ state->last_end == end &&
+ state->last_win == win_raw) {
+ state->retrans++;
+ } else {
+ state->last_dir = dir;
+ state->last_seq = seq;
+ state->last_ack = ack;
+ state->last_end = end;
+ state->last_win = win_raw;
+ state->retrans = 0;
+ }
+ }
- if (before(seq, sender->td_maxend + 1) &&
- in_recv_win &&
- before(sack, receiver->td_end + 1) &&
- after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
- /*
- * Take into account window scaling (RFC 1323).
- */
- if (!tcph->syn)
- win <<= sender->td_scale;
+ return NFCT_TCP_ACCEPT;
+}
- /*
- * Update sender data.
- */
- swin = win + (sack - ack);
- if (sender->td_maxwin < swin)
- sender->td_maxwin = swin;
- if (after(end, sender->td_end)) {
- sender->td_end = end;
- sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
- }
- if (tcph->ack) {
- if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
- sender->td_maxack = ack;
- sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
- } else if (after(ack, sender->td_maxack))
- sender->td_maxack = ack;
- }
+static void __cold nf_tcp_handle_invalid(struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ int index,
+ const struct sk_buff *skb,
+ const struct nf_hook_state *hook_state)
+{
+ const unsigned int *timeouts;
+ const struct nf_tcp_net *tn;
+ unsigned int timeout;
+ u32 expires;
- /*
- * Update receiver data.
- */
- if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
- receiver->td_maxwin += end - sender->td_maxend;
- if (after(sack + win, receiver->td_maxend - 1)) {
- receiver->td_maxend = sack + win;
- if (win == 0)
- receiver->td_maxend++;
- }
- if (ack == receiver->td_end)
- receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
+ return;
- /*
- * Check retransmissions.
- */
- if (index == TCP_ACK_SET) {
- if (state->last_dir == dir
- && state->last_seq == seq
- && state->last_ack == ack
- && state->last_end == end
- && state->last_win == win_raw)
- state->retrans++;
- else {
- state->last_dir = dir;
- state->last_seq = seq;
- state->last_ack = ack;
- state->last_end = end;
- state->last_win = win_raw;
- state->retrans = 0;
- }
- }
- res = true;
- } else {
- res = false;
- if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
- tn->tcp_be_liberal)
- res = true;
- if (!res) {
- nf_ct_l4proto_log_invalid(skb, ct, hook_state,
- "%s",
- before(seq, sender->td_maxend + 1) ?
- in_recv_win ?
- before(sack, receiver->td_end + 1) ?
- after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
- : "ACK is under the lower bound (possible overly delayed ACK)"
- : "ACK is over the upper bound (ACKed data not seen yet)"
- : "SEQ is under the lower bound (already ACKed data retransmitted)"
- : "SEQ is over the upper bound (over the window of the receiver)");
- }
+ /* We don't want to have connections hanging around in ESTABLISHED
+ * state for long time 'just because' conntrack deemed a FIN/RST
+ * out-of-window.
+ *
+ * Shrink the timeout just like when there is unacked data.
+ * This speeds up eviction of 'dead' connections where the
+ * connection and conntracks internal state are out of sync.
+ */
+ switch (index) {
+ case TCP_RST_SET:
+ case TCP_FIN_SET:
+ break;
+ default:
+ return;
}
- pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
- "receiver end=%u maxend=%u maxwin=%u\n",
- res, sender->td_end, sender->td_maxend, sender->td_maxwin,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
+ if (ct->proto.tcp.last_dir != dir &&
+ (ct->proto.tcp.last_index == TCP_FIN_SET ||
+ ct->proto.tcp.last_index == TCP_RST_SET)) {
+ expires = nf_ct_expires(ct);
+ if (expires < 120 * HZ)
+ return;
+
+ tn = nf_tcp_pernet(nf_ct_net(ct));
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts)
+ timeouts = tn->timeouts;
+
+ timeout = READ_ONCE(timeouts[TCP_CONNTRACK_UNACK]);
+ if (expires > timeout) {
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
+ "packet (index %d, dir %d) response for index %d lower timeout to %u",
+ index, dir, ct->proto.tcp.last_index, timeout);
- return res;
+ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+ }
+ } else {
+ ct->proto.tcp.last_index = index;
+ ct->proto.tcp.last_dir = dir;
+ }
}
/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
@@ -758,8 +840,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
const struct nf_tcp_net *tn = nf_tcp_pernet(net);
- const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
- const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
/* Don't need lock here: this conntrack not in circulation yet */
new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
@@ -812,14 +892,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* tcp_packet will set them */
ct->proto.tcp.last_index = TCP_NONE_SET;
-
- pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- __func__,
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
return true;
}
@@ -839,6 +911,16 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
return false;
}
+static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+{
+ state->td_end = 0;
+ state->td_maxend = 0;
+ state->td_maxwin = 0;
+ state->td_maxack = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -851,6 +933,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
unsigned int index, *timeouts;
+ enum nf_ct_tcp_action res;
enum ip_conntrack_dir dir;
const struct tcphdr *th;
struct tcphdr _tcph;
@@ -945,8 +1028,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
ct->proto.tcp.last_flags;
- memset(&ct->proto.tcp.seen[dir], 0,
- sizeof(struct ip_ct_tcp_state));
+ nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
break;
}
ct->proto.tcp.last_index = index;
@@ -1009,10 +1091,11 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
}
/* Invalid packet */
- pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
- dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state");
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "packet (index %d) in dir %d invalid, state %s",
+ index, dir,
+ tcp_conntrack_names[old_state]);
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -1116,10 +1199,18 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
break;
}
- if (!tcp_in_window(ct, dir, index,
- skb, dataoff, th, state)) {
+ res = tcp_in_window(ct, dir, index,
+ skb, dataoff, th, state);
+ switch (res) {
+ case NFCT_TCP_IGNORE:
+ spin_unlock_bh(&ct->lock);
+ return NF_ACCEPT;
+ case NFCT_TCP_INVALID:
+ nf_tcp_handle_invalid(ct, dir, index, skb, state);
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
+ case NFCT_TCP_ACCEPT:
+ break;
}
in_window:
/* From now on we have got in-window packets */
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index fcb33b1d5456..13dc421fc4f5 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -34,10 +34,6 @@ MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
MODULE_DESCRIPTION("SANE connection tracking helper");
MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
-static char *sane_buffer;
-
-static DEFINE_SPINLOCK(nf_sane_lock);
-
#define MAX_PORTS 8
static u_int16_t ports[MAX_PORTS];
static unsigned int ports_c;
@@ -67,14 +63,16 @@ static int help(struct sk_buff *skb,
unsigned int dataoff, datalen;
const struct tcphdr *th;
struct tcphdr _tcph;
- void *sb_ptr;
int ret = NF_ACCEPT;
int dir = CTINFO2DIR(ctinfo);
struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple *tuple;
- struct sane_request *req;
struct sane_reply_net_start *reply;
+ union {
+ struct sane_request req;
+ struct sane_reply_net_start repl;
+ } buf;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED &&
@@ -92,59 +90,62 @@ static int help(struct sk_buff *skb,
return NF_ACCEPT;
datalen = skb->len - dataoff;
-
- spin_lock_bh(&nf_sane_lock);
- sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
- if (!sb_ptr) {
- spin_unlock_bh(&nf_sane_lock);
- return NF_ACCEPT;
- }
-
if (dir == IP_CT_DIR_ORIGINAL) {
+ const struct sane_request *req;
+
if (datalen != sizeof(struct sane_request))
- goto out;
+ return NF_ACCEPT;
+
+ req = skb_header_pointer(skb, dataoff, datalen, &buf.req);
+ if (!req)
+ return NF_ACCEPT;
- req = sb_ptr;
if (req->RPC_code != htonl(SANE_NET_START)) {
/* Not an interesting command */
- ct_sane_info->state = SANE_STATE_NORMAL;
- goto out;
+ WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
+ return NF_ACCEPT;
}
/* We're interested in the next reply */
- ct_sane_info->state = SANE_STATE_START_REQUESTED;
- goto out;
+ WRITE_ONCE(ct_sane_info->state, SANE_STATE_START_REQUESTED);
+ return NF_ACCEPT;
}
+ /* IP_CT_DIR_REPLY */
+
/* Is it a reply to an uninteresting command? */
- if (ct_sane_info->state != SANE_STATE_START_REQUESTED)
- goto out;
+ if (READ_ONCE(ct_sane_info->state) != SANE_STATE_START_REQUESTED)
+ return NF_ACCEPT;
/* It's a reply to SANE_NET_START. */
- ct_sane_info->state = SANE_STATE_NORMAL;
+ WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL);
if (datalen < sizeof(struct sane_reply_net_start)) {
pr_debug("NET_START reply too short\n");
- goto out;
+ return NF_ACCEPT;
}
- reply = sb_ptr;
+ datalen = sizeof(struct sane_reply_net_start);
+
+ reply = skb_header_pointer(skb, dataoff, datalen, &buf.repl);
+ if (!reply)
+ return NF_ACCEPT;
+
if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
/* saned refused the command */
pr_debug("unsuccessful SANE_STATUS = %u\n",
ntohl(reply->status));
- goto out;
+ return NF_ACCEPT;
}
/* Invalid saned reply? Ignore it. */
if (reply->zero != 0)
- goto out;
+ return NF_ACCEPT;
exp = nf_ct_expect_alloc(ct);
if (exp == NULL) {
nf_ct_helper_log(skb, ct, "cannot alloc expectation");
- ret = NF_DROP;
- goto out;
+ return NF_DROP;
}
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
@@ -162,9 +163,6 @@ static int help(struct sk_buff *skb,
}
nf_ct_expect_put(exp);
-
-out:
- spin_unlock_bh(&nf_sane_lock);
return ret;
}
@@ -178,7 +176,6 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
static void __exit nf_conntrack_sane_fini(void)
{
nf_conntrack_helpers_unregister(sane, ports_c * 2);
- kfree(sane_buffer);
}
static int __init nf_conntrack_sane_init(void)
@@ -187,10 +184,6 @@ static int __init nf_conntrack_sane_init(void)
NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master));
- sane_buffer = kmalloc(65536, GFP_KERNEL);
- if (!sane_buffer)
- return -ENOMEM;
-
if (ports_c == 0)
ports[ports_c++] = SANE_PORT;
@@ -210,7 +203,6 @@ static int __init nf_conntrack_sane_init(void)
ret = nf_conntrack_helpers_register(sane, ports_c * 2);
if (ret < 0) {
pr_err("failed to register helpers\n");
- kfree(sane_buffer);
return ret;
}
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index 3066449f8bd8..7ab2b25b57bc 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -232,19 +232,3 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct,
this_way->offset_after : this_way->offset_before;
}
EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
-
-static const struct nf_ct_ext_type nf_ct_seqadj_extend = {
- .len = sizeof(struct nf_conn_seqadj),
- .align = __alignof__(struct nf_conn_seqadj),
- .id = NF_CT_EXT_SEQADJ,
-};
-
-int nf_conntrack_seqadj_init(void)
-{
- return nf_ct_extend_register(&nf_ct_seqadj_extend);
-}
-
-void nf_conntrack_seqadj_fini(void)
-{
- nf_ct_extend_unregister(&nf_ct_seqadj_extend);
-}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index b83dc9bf0a5d..77f5e82d8e3f 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -60,7 +60,7 @@ module_param(sip_external_media, int, 0600);
MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external "
"endpoints (default 0)");
-const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
+const struct nf_nat_sip_hooks __rcu *nf_nat_sip_hooks;
EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
static int string_len(const struct nf_conn *ct, const char *dptr,
@@ -477,7 +477,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
return ret;
if (ret == 0)
break;
- dataoff += *matchoff;
+ dataoff = *matchoff;
}
*in_header = 0;
}
@@ -489,7 +489,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
break;
if (ret == 0)
return ret;
- dataoff += *matchoff;
+ dataoff = *matchoff;
}
if (in_header)
@@ -1229,6 +1229,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
struct nf_conntrack_expect *exp;
union nf_inet_addr *saddr, daddr;
const struct nf_nat_sip_hooks *hooks;
+ struct nf_conntrack_helper *helper;
__be16 port;
u8 proto;
unsigned int expires = 0;
@@ -1289,10 +1290,14 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
if (sip_direct_signalling)
saddr = &ct->tuplehash[!dir].tuple.src.u3;
+ helper = rcu_dereference(nfct_help(ct)->helper);
+ if (!helper)
+ return NF_DROP;
+
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
- exp->helper = nfct_help(ct)->helper;
+ exp->helper = helper;
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3e1afd10a9b6..4ffe84c5a82c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -306,6 +306,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
return 0;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (nf_ct_should_gc(ct)) {
nf_ct_kill(ct);
goto release;
@@ -558,7 +561,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_LOG_INVALID,
NF_SYSCTL_CT_EXPECT_MAX,
NF_SYSCTL_CT_ACCT,
- NF_SYSCTL_CT_HELPER,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
NF_SYSCTL_CT_EVENTS,
#endif
@@ -677,14 +679,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- [NF_SYSCTL_CT_HELPER] = {
- .procname = "nf_conntrack_helper",
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
#ifdef CONFIG_NF_CONNTRACK_EVENTS
[NF_SYSCTL_CT_EVENTS] = {
.procname = "nf_conntrack_events",
@@ -693,7 +687,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = SYSCTL_TWO,
},
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
@@ -823,7 +817,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
-#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
.procname = "nf_flowtable_udp_timeout",
.maxlen = sizeof(unsigned int),
@@ -1097,7 +1091,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
- table[NF_SYSCTL_CT_HELPER].data = &cnet->sysctl_auto_assign_helper;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
#endif
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 14387e0b8008..0cc584d3dbb1 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -22,19 +22,21 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_timeout.h>
-struct nf_ct_timeout *
-(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
-
-void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook);
+const struct nf_ct_timeout_hooks __rcu *nf_ct_timeout_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_timeout_hook);
static int untimeout(struct nf_conn *ct, void *timeout)
{
struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
- if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
- RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+ if (timeout_ext) {
+ const struct nf_ct_timeout *t;
+
+ t = rcu_access_pointer(timeout_ext->timeout);
+
+ if (!timeout || t == timeout)
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+ }
/* We are not intended to delete this conntrack. */
return 0;
@@ -42,37 +44,41 @@ static int untimeout(struct nf_conn *ct, void *timeout)
void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout)
{
- nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
+ struct nf_ct_iter_data iter_data = {
+ .net = net,
+ .data = timeout,
+ };
+
+ nf_ct_iterate_cleanup_net(untimeout, &iter_data);
}
EXPORT_SYMBOL_GPL(nf_ct_untimeout);
static void __nf_ct_timeout_put(struct nf_ct_timeout *timeout)
{
- typeof(nf_ct_timeout_put_hook) timeout_put;
+ const struct nf_ct_timeout_hooks *h = rcu_dereference(nf_ct_timeout_hook);
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
- if (timeout_put)
- timeout_put(timeout);
+ if (h)
+ h->timeout_put(timeout);
}
int nf_ct_set_timeout(struct net *net, struct nf_conn *ct,
u8 l3num, u8 l4num, const char *timeout_name)
{
- typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ const struct nf_ct_timeout_hooks *h;
struct nf_ct_timeout *timeout;
struct nf_conn_timeout *timeout_ext;
const char *errmsg = NULL;
int ret = 0;
rcu_read_lock();
- timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
- if (!timeout_find_get) {
+ h = rcu_dereference(nf_ct_timeout_hook);
+ if (!h) {
ret = -ENOENT;
errmsg = "Timeout policy base is empty";
goto out;
}
- timeout = timeout_find_get(net, timeout_name);
+ timeout = h->timeout_find_get(net, timeout_name);
if (!timeout) {
ret = -ENOENT;
pr_info_ratelimited("No such timeout policy \"%s\"\n",
@@ -119,37 +125,22 @@ EXPORT_SYMBOL_GPL(nf_ct_set_timeout);
void nf_ct_destroy_timeout(struct nf_conn *ct)
{
struct nf_conn_timeout *timeout_ext;
- typeof(nf_ct_timeout_put_hook) timeout_put;
+ const struct nf_ct_timeout_hooks *h;
rcu_read_lock();
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
+ h = rcu_dereference(nf_ct_timeout_hook);
- if (timeout_put) {
+ if (h) {
timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext) {
- timeout_put(timeout_ext->timeout);
+ struct nf_ct_timeout *t;
+
+ t = rcu_dereference(timeout_ext->timeout);
+ if (t)
+ h->timeout_put(t);
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
}
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_ct_destroy_timeout);
-
-static const struct nf_ct_ext_type timeout_extend = {
- .len = sizeof(struct nf_conn_timeout),
- .align = __alignof__(struct nf_conn_timeout),
- .id = NF_CT_EXT_TIMEOUT,
-};
-
-int nf_conntrack_timeout_init(void)
-{
- int ret = nf_ct_extend_register(&timeout_extend);
- if (ret < 0)
- pr_err("nf_ct_timeout: Unable to register timeout extension.\n");
- return ret;
-}
-
-void nf_conntrack_timeout_fini(void)
-{
- nf_ct_extend_unregister(&timeout_extend);
-}
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index f656d393fa92..9e43a0a59e73 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -19,27 +19,7 @@ static bool nf_ct_tstamp __read_mostly;
module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
-static const struct nf_ct_ext_type tstamp_extend = {
- .len = sizeof(struct nf_conn_tstamp),
- .align = __alignof__(struct nf_conn_tstamp),
- .id = NF_CT_EXT_TSTAMP,
-};
-
void nf_conntrack_tstamp_pernet_init(struct net *net)
{
net->ct.sysctl_tstamp = nf_ct_tstamp;
}
-
-int nf_conntrack_tstamp_init(void)
-{
- int ret;
- ret = nf_ct_extend_register(&tstamp_extend);
- if (ret < 0)
- pr_err("Unable to register extension\n");
- return ret;
-}
-
-void nf_conntrack_tstamp_fini(void)
-{
- nf_ct_extend_unregister(&tstamp_extend);
-}
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index a579e59ee5c5..a8e2425e43b0 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,14 +13,31 @@
#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
-static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
+#define NF_RECURSION_LIMIT 2
+
+static DEFINE_PER_CPU(u8, nf_dup_skb_recursion);
+
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
+ enum nf_dev_hooks hook)
{
- if (skb_mac_header_was_set(skb))
+ if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT)
+ goto err;
+
+ if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) {
+ if (skb_cow_head(skb, skb->mac_len))
+ goto err;
+
skb_push(skb, skb->mac_len);
+ }
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
+ __this_cpu_inc(nf_dup_skb_recursion);
dev_queue_xmit(skb);
+ __this_cpu_dec(nf_dup_skb_recursion);
+ return;
+err:
+ kfree_skb(skb);
}
void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
@@ -33,7 +50,7 @@ void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
return;
}
- nf_do_netdev_egress(pkt->skb, dev);
+ nf_do_netdev_egress(pkt->skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
@@ -48,7 +65,7 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
skb = skb_clone(pkt->skb, GFP_ATOMIC);
if (skb)
- nf_do_netdev_egress(skb, dev);
+ nf_do_netdev_egress(skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index b90eca7a2f22..81c26a96c30b 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -39,22 +39,28 @@ flow_offload_fill_dir(struct flow_offload *flow,
ft->l3proto = ctt->src.l3num;
ft->l4proto = ctt->dst.protonum;
- ft->src_port = ctt->src.u.tcp.port;
- ft->dst_port = ctt->dst.u.tcp.port;
+
+ switch (ctt->dst.protonum) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ ft->src_port = ctt->src.u.tcp.port;
+ ft->dst_port = ctt->dst.u.tcp.port;
+ break;
+ }
}
struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
{
struct flow_offload *flow;
- if (unlikely(nf_ct_is_dying(ct) ||
- !refcount_inc_not_zero(&ct->ct_general.use)))
+ if (unlikely(nf_ct_is_dying(ct)))
return NULL;
flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
if (!flow)
- goto err_ct_refcnt;
+ return NULL;
+ refcount_inc(&ct->ct_general.use);
flow->ct = ct;
flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -66,11 +72,6 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
__set_bit(NF_FLOW_DNAT, &flow->flags);
return flow;
-
-err_ct_refcnt:
- nf_ct_put(ct);
-
- return NULL;
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
@@ -173,12 +174,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
- tcp->state = TCP_CONNTRACK_ESTABLISHED;
tcp->seen[0].td_maxwin = 0;
tcp->seen[1].td_maxwin = 0;
}
-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
@@ -187,7 +187,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ flow_offload_fixup_tcp(&ct->proto.tcp);
+
+ timeout = tn->timeouts[ct->proto.tcp.state];
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -205,18 +207,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
- if (nf_ct_protonum(ct) == IPPROTO_TCP)
- flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
- flow_offload_fixup_ct_state(ct);
- flow_offload_fixup_ct_timeout(ct);
-}
-
static void flow_offload_route_release(struct flow_offload *flow)
{
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -329,8 +319,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
- if (READ_ONCE(flow->timeout) != timeout)
+ if (timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
+ else
+ return;
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
@@ -353,22 +345,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
-
- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
- if (nf_flow_has_expired(flow))
- flow_offload_fixup_ct(flow->ct);
- else
- flow_offload_fixup_ct_timeout(flow->ct);
-
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
- flow_offload_fixup_ct_state(flow->ct);
+ flow_offload_fixup_ct(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -399,7 +383,8 @@ EXPORT_SYMBOL_GPL(flow_offload_lookup);
static int
nf_flow_table_iterate(struct nf_flowtable *flow_table,
- void (*iter)(struct flow_offload *flow, void *data),
+ void (*iter)(struct nf_flowtable *flowtable,
+ struct flow_offload *flow, void *data),
void *data)
{
struct flow_offload_tuple_rhash *tuplehash;
@@ -423,7 +408,7 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
- iter(flow, data);
+ iter(flow_table, flow, data);
}
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
@@ -431,34 +416,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
-static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
-{
- struct dst_entry *dst;
-
- if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
- tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
- dst = tuple->dst_cache;
- if (!dst_check(dst, tuple->dst_cookie))
- return true;
- }
-
- return false;
-}
-
-static bool nf_flow_has_stale_dst(struct flow_offload *flow)
-{
- return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
- flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
-}
-
-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
+static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
+ struct flow_offload *flow, void *data)
{
- struct nf_flowtable *flow_table = data;
-
if (nf_flow_has_expired(flow) ||
- nf_ct_is_dying(flow->ct) ||
- nf_flow_has_stale_dst(flow))
- set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+ nf_ct_is_dying(flow->ct))
+ flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
@@ -474,12 +437,17 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
}
}
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
+{
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+}
+
static void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
+ nf_flow_table_gc_run(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
@@ -595,7 +563,8 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
}
EXPORT_SYMBOL_GPL(nf_flow_table_init);
-static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
+ struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
@@ -636,24 +605,83 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
nf_flow_table_offload_flush(flow_table);
- if (nf_flowtable_hw_offload(flow_table))
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
- flow_table);
+ /* ... no more pending work after this stage ... */
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ nf_flow_table_gc_run(flow_table);
+ nf_flow_table_offload_flush_cleanup(flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
+static int nf_flow_table_init_net(struct net *net)
+{
+ net->ft.stat = alloc_percpu(struct nf_flow_table_stat);
+ return net->ft.stat ? 0 : -ENOMEM;
+}
+
+static void nf_flow_table_fini_net(struct net *net)
+{
+ free_percpu(net->ft.stat);
+}
+
+static int nf_flow_table_pernet_init(struct net *net)
+{
+ int ret;
+
+ ret = nf_flow_table_init_net(net);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_flow_table_init_proc(net);
+ if (ret < 0)
+ goto out_proc;
+
+ return 0;
+
+out_proc:
+ nf_flow_table_fini_net(net);
+ return ret;
+}
+
+static void nf_flow_table_pernet_exit(struct list_head *net_exit_list)
+{
+ struct net *net;
+
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ nf_flow_table_fini_proc(net);
+ nf_flow_table_fini_net(net);
+ }
+}
+
+static struct pernet_operations nf_flow_table_net_ops = {
+ .init = nf_flow_table_pernet_init,
+ .exit_batch = nf_flow_table_pernet_exit,
+};
+
static int __init nf_flow_table_module_init(void)
{
- return nf_flow_table_offload_init();
+ int ret;
+
+ ret = register_pernet_subsys(&nf_flow_table_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_flow_table_offload_init();
+ if (ret)
+ goto out_offload;
+
+ return 0;
+
+out_offload:
+ unregister_pernet_subsys(&nf_flow_table_net_ops);
+ return ret;
}
static void __exit nf_flow_table_module_exit(void)
{
nf_flow_table_offload_exit();
+ unregister_pernet_subsys(&nf_flow_table_net_ops);
}
module_init(nf_flow_table_module_init);
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 5c57ade6bd05..0ccabf3fa6aa 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -6,12 +6,29 @@
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
+#include <linux/if_vlan.h>
static unsigned int
nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ struct vlan_ethhdr *veth;
+ __be16 proto;
+
switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ break;
+ default:
+ proto = skb->protocol;
+ break;
+ }
+
+ switch (proto) {
case htons(ETH_P_IP):
return nf_flow_offload_ip_hook(priv, skb, state);
case htons(ETH_P_IPV6):
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 889cf88d3dba..b350fe9d00b0 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -8,8 +8,6 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
-#include <linux/if_pppox.h>
-#include <linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -172,6 +170,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
+ u8 ipproto;
if (!pskb_may_pull(skb, sizeof(*iph) + offset))
return -1;
@@ -185,13 +184,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
thoff += offset;
- switch (iph->protocol) {
+ ipproto = iph->protocol;
+ switch (ipproto) {
case IPPROTO_TCP:
*hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
*hdrsize = sizeof(struct udphdr);
break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ *hdrsize = sizeof(struct gre_base_hdr);
+ break;
+#endif
default:
return -1;
}
@@ -202,15 +207,29 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
+ switch (ipproto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ break;
+ case IPPROTO_GRE: {
+ struct gre_base_hdr *greh;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+ if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+ return -1;
+ break;
+ }
+ }
+
iph = (struct iphdr *)(skb_network_header(skb) + offset);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET;
- tuple->l4proto = iph->protocol;
+ tuple->l4proto = ipproto;
tuple->iifidx = dev->ifindex;
nf_flow_tuple_encap(skb, tuple);
@@ -229,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
+{
+ if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
+ tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
+ return true;
+
+ return dst_check(tuple->dst_cache, tuple->dst_cookie);
+}
+
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state,
struct dst_entry *dst)
@@ -239,22 +267,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
-{
- __be16 proto;
-
- proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
- sizeof(struct pppoe_hdr)));
- switch (proto) {
- case htons(PPP_IP):
- return htons(ETH_P_IP);
- case htons(PPP_IPV6):
- return htons(ETH_P_IPV6);
- }
-
- return 0;
-}
-
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
u32 *offset)
{
@@ -364,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
@@ -376,7 +393,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
ip_decrease_ttl(iph);
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
@@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
+ u8 nexthdr;
thoff = sizeof(*ip6h) + offset;
if (!pskb_may_pull(skb, thoff))
@@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
- switch (ip6h->nexthdr) {
+ nexthdr = ip6h->nexthdr;
+ switch (nexthdr) {
case IPPROTO_TCP:
*hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
*hdrsize = sizeof(struct udphdr);
break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ *hdrsize = sizeof(struct gre_base_hdr);
+ break;
+#endif
default:
return -1;
}
@@ -545,15 +569,29 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + *hdrsize))
return -1;
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ break;
+ case IPPROTO_GRE: {
+ struct gre_base_hdr *greh;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+ if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+ return -1;
+ break;
+ }
+ }
+
ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET6;
- tuple->l4proto = ip6h->nexthdr;
+ tuple->l4proto = nexthdr;
tuple->iifidx = dev->ifindex;
nf_flow_tuple_encap(skb, tuple);
@@ -600,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
@@ -611,7 +654,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
nf_flow_nat_ipv6(flow, skb, dir, ip6h);
ip6h->hop_limit--;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b561e0a44a45..b04645ced89b 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -20,7 +20,6 @@ static struct workqueue_struct *nf_flow_offload_stats_wq;
struct flow_offload_work {
struct list_head list;
enum flow_cls_command cmd;
- int priority;
struct nf_flowtable *flowtable;
struct flow_offload *flow;
struct work_struct work;
@@ -110,7 +109,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
nf_flow_rule_lwt_match(match, tun_info);
}
- key->meta.ingress_ifindex = tuple->iifidx;
+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
+ key->meta.ingress_ifindex = tuple->tc.iifidx;
+ else
+ key->meta.ingress_ifindex = tuple->iifidx;
+
mask->meta.ingress_ifindex = 0xffffffff;
if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
@@ -170,6 +173,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
break;
case IPPROTO_UDP:
+ case IPPROTO_GRE:
break;
default:
return -EOPNOTSUPP;
@@ -178,15 +182,22 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->basic.ip_proto = tuple->l4proto;
mask->basic.ip_proto = 0xff;
- key->tp.src = tuple->src_port;
- mask->tp.src = 0xffff;
- key->tp.dst = tuple->dst_port;
- mask->tp.dst = 0xffff;
-
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
BIT(FLOW_DISSECTOR_KEY_CONTROL) |
- BIT(FLOW_DISSECTOR_KEY_BASIC) |
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT(FLOW_DISSECTOR_KEY_BASIC);
+
+ switch (tuple->l4proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ key->tp.src = tuple->src_port;
+ mask->tp.src = 0xffff;
+ key->tp.dst = tuple->dst_port;
+ mask->tp.dst = 0xffff;
+
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+ break;
+ }
+
return 0;
}
@@ -862,7 +873,8 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload,
enum flow_offload_tuple_dir dir)
{
return nf_flow_offload_tuple(offload->flowtable, offload->flow,
- flow_rule, dir, offload->priority,
+ flow_rule, dir,
+ offload->flowtable->priority,
FLOW_CLS_REPLACE, NULL,
&offload->flowtable->flow_block.cb_list);
}
@@ -871,7 +883,8 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload,
enum flow_offload_tuple_dir dir)
{
nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
- offload->priority, FLOW_CLS_DESTROY, NULL,
+ offload->flowtable->priority,
+ FLOW_CLS_DESTROY, NULL,
&offload->flowtable->flow_block.cb_list);
}
@@ -922,7 +935,8 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload,
struct flow_stats *stats)
{
nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
- offload->priority, FLOW_CLS_STATS, stats,
+ offload->flowtable->priority,
+ FLOW_CLS_STATS, stats,
&offload->flowtable->flow_block.cb_list);
}
@@ -953,17 +967,22 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
static void flow_offload_work_handler(struct work_struct *work)
{
struct flow_offload_work *offload;
+ struct net *net;
offload = container_of(work, struct flow_offload_work, work);
+ net = read_pnet(&offload->flowtable->net);
switch (offload->cmd) {
case FLOW_CLS_REPLACE:
flow_offload_work_add(offload);
+ NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add);
break;
case FLOW_CLS_DESTROY:
flow_offload_work_del(offload);
+ NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del);
break;
case FLOW_CLS_STATS:
flow_offload_work_stats(offload);
+ NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats);
break;
default:
WARN_ON_ONCE(1);
@@ -975,12 +994,18 @@ static void flow_offload_work_handler(struct work_struct *work)
static void flow_offload_queue_work(struct flow_offload_work *offload)
{
- if (offload->cmd == FLOW_CLS_REPLACE)
+ struct net *net = read_pnet(&offload->flowtable->net);
+
+ if (offload->cmd == FLOW_CLS_REPLACE) {
+ NF_FLOW_TABLE_STAT_INC(net, count_wq_add);
queue_work(nf_flow_offload_add_wq, &offload->work);
- else if (offload->cmd == FLOW_CLS_DESTROY)
+ } else if (offload->cmd == FLOW_CLS_DESTROY) {
+ NF_FLOW_TABLE_STAT_INC(net, count_wq_del);
queue_work(nf_flow_offload_del_wq, &offload->work);
- else
+ } else {
+ NF_FLOW_TABLE_STAT_INC(net, count_wq_stats);
queue_work(nf_flow_offload_stats_wq, &offload->work);
+ }
}
static struct flow_offload_work *
@@ -1000,7 +1025,6 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
offload->cmd = cmd;
offload->flow = flow;
- offload->priority = flowtable->priority;
offload->flowtable = flowtable;
INIT_WORK(&offload->work, flow_offload_work_handler);
@@ -1050,6 +1074,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
flow_offload_queue_work(offload);
}
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
+{
+ if (nf_flowtable_hw_offload(flowtable)) {
+ flush_workqueue(nf_flow_offload_del_wq);
+ nf_flow_table_gc_run(flowtable);
+ }
+}
+
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
{
if (nf_flowtable_hw_offload(flowtable)) {
diff --git a/net/netfilter/nf_flow_table_procfs.c b/net/netfilter/nf_flow_table_procfs.c
new file mode 100644
index 000000000000..159b033a43e6
--- /dev/null
+++ b/net/netfilter/nf_flow_table_procfs.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <net/netfilter/nf_flow_table.h>
+
+static void *nf_flow_table_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos - 1; cpu < nr_cpu_ids; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(net->ft.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *nf_flow_table_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(net->ft.stat, cpu);
+ }
+ (*pos)++;
+ return NULL;
+}
+
+static void nf_flow_table_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int nf_flow_table_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ const struct nf_flow_table_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, "wq_add wq_del wq_stats\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%8d %8d %8d\n",
+ st->count_wq_add,
+ st->count_wq_del,
+ st->count_wq_stats
+ );
+ return 0;
+}
+
+static const struct seq_operations nf_flow_table_cpu_seq_ops = {
+ .start = nf_flow_table_cpu_seq_start,
+ .next = nf_flow_table_cpu_seq_next,
+ .stop = nf_flow_table_cpu_seq_stop,
+ .show = nf_flow_table_cpu_seq_show,
+};
+
+int nf_flow_table_init_proc(struct net *net)
+{
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_net("nf_flowtable", 0444, net->proc_net_stat,
+ &nf_flow_table_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+ return pde ? 0 : -ENOMEM;
+}
+
+void nf_flow_table_fini_proc(struct net *net)
+{
+ remove_proc_entry("nf_flowtable", net->proc_net_stat);
+}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index edee7fa944c1..8a29290149bd 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -443,9 +443,9 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
mutex_lock(&nf_log_mutex);
logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
if (!logger)
- strlcpy(buf, "NONE", sizeof(buf));
+ strscpy(buf, "NONE", sizeof(buf));
else
- strlcpy(buf, logger->name, sizeof(buf));
+ strscpy(buf, logger->name, sizeof(buf));
mutex_unlock(&nf_log_mutex);
r = proc_dostring(&tmp, write, buffer, lenp, ppos);
}
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 13234641cdb3..cb894f0d63e9 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -40,6 +40,12 @@ struct arppayload {
unsigned char ip_dst[4];
};
+/* Guard against containers flooding syslog. */
+static bool nf_log_allowed(const struct net *net)
+{
+ return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
+}
+
static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
@@ -61,7 +67,7 @@ dump_arp_packet(struct nf_log_buf *m,
unsigned int logflags;
struct arphdr _arph;
- ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph);
if (!ah) {
nf_log_buf_add(m, "TRUNCATED");
return;
@@ -90,7 +96,7 @@ dump_arp_packet(struct nf_log_buf *m,
ah->ar_pln != sizeof(__be32))
return;
- ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+ ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp);
if (!ap) {
nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
skb->len - sizeof(_arph));
@@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -144,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
prefix);
- dump_arp_packet(m, loginfo, skb, 0);
+ dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
}
@@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
}
-static void dump_ipv4_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
+static void dump_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
unsigned int logflags = 0;
@@ -798,9 +803,26 @@ fallback:
const unsigned char *p = skb_mac_header(skb);
unsigned int i;
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < dev->hard_header_len; i++, p++)
- nf_log_buf_add(m, ":%02x", *p);
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < dev->hard_header_len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+
+ nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
+ &iph->daddr);
+ }
}
nf_log_buf_add(m, " ");
}
@@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -827,9 +848,9 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
out, loginfo, prefix);
if (in)
- dump_ipv4_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
- dump_ipv4_packet(net, m, loginfo, skb, 0);
+ dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
}
@@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = {
.me = THIS_MODULE,
};
-static void dump_ipv6_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & NF_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
- nf_log_dump_vlan(m, skb);
- nf_log_buf_add(m, "MACPROTO=%04x ",
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- nf_log_buf_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT) {
- p -= ETH_HLEN;
-
- if (p < skb->head)
- p = NULL;
- }
-
- if (p) {
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- nf_log_buf_add(m, ":%02x", *p++);
- }
- nf_log_buf_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
- &iph->daddr);
- }
- } else {
- nf_log_buf_add(m, " ");
- }
-}
-
static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
@@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo, prefix);
if (in)
- dump_ipv6_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
@@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
.me = THIS_MODULE,
};
+static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ if (!nf_log_allowed(net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+
+ dump_mac_header(m, loginfo, skb);
+
+ nf_log_buf_close(m);
+}
+
static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
@@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
case htons(ETH_P_RARP):
nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
break;
+ default:
+ nf_log_unknown_packet(net, pf, hooknum, skb,
+ in, out, loginfo, prefix);
+ break;
}
}
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index 3bc7e0854efe..98deef6cde69 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -44,19 +44,7 @@ static unsigned int help(struct sk_buff *skb,
exp->expectfn = nf_nat_follow_master;
/* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- int res;
-
- exp->tuple.dst.u.tcp.port = htons(port);
- res = nf_ct_expect_related(exp, 0);
- if (res == 0)
- break;
- else if (res != -EBUSY) {
- port = 0;
- break;
- }
- }
-
+ port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port));
if (port == 0) {
nf_ct_helper_log(skb, exp->master, "all ports in use");
return NF_DROP;
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
new file mode 100644
index 000000000000..0fa5a0bbb0ff
--- /dev/null
+++ b/net/netfilter/nf_nat_bpf.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable NAT Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_nat BTF");
+
+/* bpf_ct_set_nat_info - Set source or destination nat address
+ *
+ * Set source or destination nat address of the newly allocated
+ * nf_conn before insertion. This must be invoked for referenced
+ * PTR_TO_BTF_ID to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @addr - Nat source/destination address
+ * @port - Nat source/destination port. Non-positive values are
+ * interpreted as select a random port.
+ * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
+ */
+int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
+ union nf_inet_addr *addr, int port,
+ enum nf_nat_manip_type manip)
+{
+ struct nf_conn *ct = (struct nf_conn *)nfct;
+ u16 proto = nf_ct_l3num(ct);
+ struct nf_nat_range2 range;
+
+ if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6)
+ return -EINVAL;
+
+ memset(&range, 0, sizeof(struct nf_nat_range2));
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = *addr;
+ range.max_addr = range.min_addr;
+ if (port > 0) {
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto.all = cpu_to_be16(port);
+ range.max_proto.all = range.min_proto.all;
+ }
+
+ return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(nf_nat_kfunc_set)
+BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
+BTF_SET8_END(nf_nat_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &nf_nat_kfunc_set,
+};
+
+int register_nf_nat_bpf(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &nf_bpf_nat_kfunc_set);
+ if (ret)
+ return ret;
+
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &nf_bpf_nat_kfunc_set);
+}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2d06a66899b2..e29e4ccb5c5a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -16,7 +16,7 @@
#include <linux/siphash.h>
#include <linux/rtnetlink.h>
-#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
@@ -468,7 +468,7 @@ find_free_id:
if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
else
- off = prandom_u32();
+ off = get_random_u16();
attempts = range_size;
if (attempts > max_attempts)
@@ -490,42 +490,10 @@ another_round:
if (attempts >= range_size || attempts < 16)
return;
attempts /= 2;
- off = prandom_u32();
+ off = get_random_u16();
goto another_round;
}
-static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple)
-{
- u16 sp, dp;
-
- switch (tuple->dst.protonum) {
- case IPPROTO_TCP:
- sp = ntohs(tuple->src.u.tcp.port);
- dp = ntohs(tuple->dst.u.tcp.port);
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- sp = ntohs(tuple->src.u.udp.port);
- dp = ntohs(tuple->dst.u.udp.port);
- break;
- default:
- return false;
- }
-
- /* IANA: System port range: 1-1023,
- * user port range: 1024-49151,
- * private port range: 49152-65535.
- *
- * Linux default ephemeral port range is 32768-60999.
- *
- * Enforce port remapping if sport is significantly lower
- * than dport to prevent NAT port shadowing, i.e.
- * accidental match of 'new' inbound connection vs.
- * existing outbound one.
- */
- return sp < 16384 && dp >= 32768;
-}
-
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
- bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL;
const struct nf_conntrack_zone *zone;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
- if (maniptype == NF_NAT_MANIP_SRC &&
- !random_port &&
- !ct->local_origin)
- random_port = tuple_force_port_remap(orig_tuple);
-
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
@@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
* So far, we don't do local source mappings, so multiple
* manips not an issue.
*/
- if (maniptype == NF_NAT_MANIP_SRC && !random_port) {
+ if (maniptype == NF_NAT_MANIP_SRC &&
+ !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
*/
/* Only bother mapping if it's not already in range and unique */
- if (!random_port) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
@@ -838,7 +801,7 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
-static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
@@ -860,7 +823,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* will delete entry from already-freed table.
*/
if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
- __nf_nat_cleanup_conntrack(ct);
+ nf_nat_cleanup_conntrack(ct);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -868,20 +831,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
return 0;
}
-/* No one using conntrack by the time this called. */
-static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
-{
- if (ct->status & IPS_SRC_NAT_DONE)
- __nf_nat_cleanup_conntrack(ct);
-}
-
-static struct nf_ct_ext_type nat_extend __read_mostly = {
- .len = sizeof(struct nf_conn_nat),
- .align = __alignof__(struct nf_conn_nat),
- .destroy = nf_nat_cleanup_conntrack,
- .id = NF_CT_EXT_NAT,
-};
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1173,6 +1122,7 @@ static const struct nf_nat_hook nat_hook = {
.decode_session = __nf_nat_decode_session,
#endif
.manip_pkt = nf_nat_manip_pkt,
+ .remove_nat_bysrc = nf_nat_cleanup_conntrack,
};
static int __init nf_nat_init(void)
@@ -1188,19 +1138,11 @@ static int __init nf_nat_init(void)
if (!nf_nat_bysource)
return -ENOMEM;
- ret = nf_ct_extend_register(&nat_extend);
- if (ret < 0) {
- kvfree(nf_nat_bysource);
- pr_err("Unable to register extension\n");
- return ret;
- }
-
for (i = 0; i < CONNTRACK_LOCKS; i++)
spin_lock_init(&nf_nat_locks[i]);
ret = register_pernet_subsys(&nat_net_ops);
if (ret < 0) {
- nf_ct_extend_unregister(&nat_extend);
kvfree(nf_nat_bysource);
return ret;
}
@@ -1210,7 +1152,16 @@ static int __init nf_nat_init(void)
WARN_ON(nf_nat_hook != NULL);
RCU_INIT_POINTER(nf_nat_hook, &nat_hook);
- return 0;
+ ret = register_nf_nat_bpf();
+ if (ret < 0) {
+ RCU_INIT_POINTER(nf_nat_hook, NULL);
+ nf_ct_helper_expectfn_unregister(&follow_master_nat);
+ synchronize_net();
+ unregister_pernet_subsys(&nat_net_ops);
+ kvfree(nf_nat_bysource);
+ }
+
+ return ret;
}
static void __exit nf_nat_cleanup(void)
@@ -1219,7 +1170,6 @@ static void __exit nf_nat_cleanup(void)
nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
- nf_ct_extend_unregister(&nat_extend);
nf_ct_helper_expectfn_unregister(&follow_master_nat);
RCU_INIT_POINTER(nf_nat_hook, NULL);
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index aace6768a64e..c92a436d9c48 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -86,22 +86,9 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
* this one. */
exp->expectfn = nf_nat_follow_master;
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- port = 0;
- break;
- }
- }
-
+ port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port));
if (port == 0) {
- nf_ct_helper_log(skb, ct, "all ports in use");
+ nf_ct_helper_log(skb, exp->master, "all ports in use");
return NF_DROP;
}
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index a263505455fc..a95a25196943 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -198,3 +198,34 @@ void nf_nat_follow_master(struct nf_conn *ct,
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
EXPORT_SYMBOL(nf_nat_follow_master);
+
+u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port)
+{
+ static const unsigned int max_attempts = 128;
+ int range, attempts_left;
+ u16 min = port;
+
+ range = USHRT_MAX - port;
+ attempts_left = range;
+
+ if (attempts_left > max_attempts)
+ attempts_left = max_attempts;
+
+ /* Try to get same port: if not, try to change it. */
+ for (;;) {
+ int res;
+
+ exp->tuple.dst.u.tcp.port = htons(port);
+ res = nf_ct_expect_related(exp, 0);
+ if (res == 0)
+ return port;
+
+ if (res != -EBUSY || (--attempts_left < 0))
+ break;
+
+ port = min + prandom_u32_max(range);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_exp_find_port);
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index c691ab8d234c..19c4fcc60c50 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -48,20 +48,8 @@ static unsigned int help(struct sk_buff *skb,
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = nf_nat_follow_master;
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- int ret;
-
- exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp, 0);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- port = 0;
- break;
- }
- }
-
+ port = nf_nat_exp_find_port(exp,
+ ntohs(exp->saved_proto.tcp.port));
if (port == 0) {
nf_ct_helper_log(skb, ct, "all ports in use");
return NF_DROP;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index e32fac374608..1a506b0c6511 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -77,11 +77,14 @@ EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
static void iterate_cleanup_work(struct work_struct *work)
{
+ struct nf_ct_iter_data iter_data = {};
struct masq_dev_work *w;
w = container_of(work, struct masq_dev_work, work);
- nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
+ iter_data.net = w->net;
+ iter_data.data = (void *)w;
+ nf_ct_iterate_cleanup_net(w->iter, &iter_data);
put_net_track(w->net, &w->ns_tracker);
kfree(w);
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index f0a735e86851..cf4aeb299bde 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -410,19 +410,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
exp->dir = !dir;
exp->expectfn = nf_nat_sip_expected;
- for (; port != 0; port++) {
- int ret;
-
- exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
- if (ret == 0)
- break;
- else if (ret != -EBUSY) {
- port = 0;
- break;
- }
- }
-
+ port = nf_nat_exp_find_port(exp, port);
if (port == 0) {
nf_ct_helper_log(skb, ct, "all ports in use for SIP");
return NF_DROP;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 6d12afabfe8a..63d1516816b1 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(void)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
+static void nf_queue_sock_put(struct sock *sk)
+{
+#ifdef CONFIG_INET
+ sock_gen_put(sk);
+#else
+ sock_put(sk);
+#endif
+}
+
static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
@@ -54,7 +63,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(state->in);
dev_put(state->out);
if (state->sk)
- sock_put(state->sk);
+ nf_queue_sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_put(entry->physin);
@@ -87,19 +96,21 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
}
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
dev_hold(state->in);
dev_hold(state->out);
- if (state->sk)
- sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -169,6 +180,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
break;
}
+ if (skb_sk_is_prefetched(skb)) {
+ struct sock *sk = skb->sk;
+
+ if (!sk_is_refcounted(sk)) {
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ return -ENOTCONN;
+
+ /* drop refcount on skb_orphan */
+ skb->destructor = sock_edemux;
+ }
+ }
+
entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry)
return -ENOMEM;
@@ -187,7 +210,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
__nf_queue_entry_init_physdevs(entry);
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
switch (entry->state.pf) {
case AF_INET:
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 2dfc5dae0656..16915f8eef2b 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -236,12 +236,6 @@ synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff,
return 1;
}
-static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
- .len = sizeof(struct nf_conn_synproxy),
- .align = __alignof__(struct nf_conn_synproxy),
- .id = NF_CT_EXT_SYNPROXY,
-};
-
#ifdef CONFIG_PROC_FS
static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -387,28 +381,12 @@ static struct pernet_operations synproxy_net_ops = {
static int __init synproxy_core_init(void)
{
- int err;
-
- err = nf_ct_extend_register(&nf_ct_synproxy_extend);
- if (err < 0)
- goto err1;
-
- err = register_pernet_subsys(&synproxy_net_ops);
- if (err < 0)
- goto err2;
-
- return 0;
-
-err2:
- nf_ct_extend_unregister(&nf_ct_synproxy_extend);
-err1:
- return err;
+ return register_pernet_subsys(&synproxy_net_ops);
}
static void __exit synproxy_core_exit(void)
{
unregister_pernet_subsys(&synproxy_net_ops);
- nf_ct_extend_unregister(&nf_ct_synproxy_extend);
}
module_init(synproxy_core_init);
@@ -427,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
- iph->ttl = net->ipv4.sysctl_ip_default_ttl;
+ iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 77938b1042f3..e7152d599d73 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
-static u64 table_handle;
enum {
NFT_VALIDATE_SKIP = 0,
@@ -153,6 +152,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
if (trans == NULL)
return NULL;
+ INIT_LIST_HEAD(&trans->list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -222,12 +222,18 @@ err_register:
}
static void nft_netdev_unregister_hooks(struct net *net,
- struct list_head *hook_list)
+ struct list_head *hook_list,
+ bool release_netdev)
{
- struct nft_hook *hook;
+ struct nft_hook *hook, *next;
- list_for_each_entry(hook, hook_list, list)
+ list_for_each_entry_safe(hook, next, hook_list, list) {
nf_unregister_net_hook(net, &hook->ops);
+ if (release_netdev) {
+ list_del(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
}
static int nf_tables_register_hook(struct net *net,
@@ -253,9 +259,10 @@ static int nf_tables_register_hook(struct net *net,
return nf_register_net_hook(net, &basechain->ops);
}
-static void nf_tables_unregister_hook(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain)
+static void __nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain,
+ bool release_netdev)
{
struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
@@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(struct net *net,
return basechain->type->ops_unregister(net, ops);
if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
- nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ nft_netdev_unregister_hooks(net, &basechain->hook_list,
+ release_netdev);
else
nf_unregister_net_hook(net, &basechain->ops);
}
+static void nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
+{
+ return __nf_tables_unregister_hook(net, table, chain, false);
+}
+
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -529,6 +544,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
+ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -550,6 +566,58 @@ static int nft_delflowtable(struct nft_ctx *ctx,
return err;
}
+static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
+{
+ int i;
+
+ for (i = track->regs[dreg].num_reg; i > 0; i--)
+ __nft_reg_track_cancel(track, dreg - i);
+}
+
+static void __nft_reg_track_update(struct nft_regs_track *track,
+ const struct nft_expr *expr,
+ u8 dreg, u8 num_reg)
+{
+ track->regs[dreg].selector = expr;
+ track->regs[dreg].bitwise = NULL;
+ track->regs[dreg].num_reg = num_reg;
+}
+
+void nft_reg_track_update(struct nft_regs_track *track,
+ const struct nft_expr *expr, u8 dreg, u8 len)
+{
+ unsigned int regcount;
+ int i;
+
+ __nft_reg_track_clobber(track, dreg);
+
+ regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+ for (i = 0; i < regcount; i++, dreg++)
+ __nft_reg_track_update(track, expr, dreg, i);
+}
+EXPORT_SYMBOL_GPL(nft_reg_track_update);
+
+void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len)
+{
+ unsigned int regcount;
+ int i;
+
+ __nft_reg_track_clobber(track, dreg);
+
+ regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+ for (i = 0; i < regcount; i++, dreg++)
+ __nft_reg_track_cancel(track, dreg);
+}
+EXPORT_SYMBOL_GPL(nft_reg_track_cancel);
+
+void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg)
+{
+ track->regs[dreg].selector = NULL;
+ track->regs[dreg].bitwise = NULL;
+ track->regs[dreg].num_reg = 0;
+}
+EXPORT_SYMBOL_GPL(__nft_reg_track_cancel);
+
/*
* Tables
*/
@@ -674,7 +742,7 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
return -ENOMEM;
req->done = false;
- strlcpy(req->module, module_name, MODULE_NAME_LEN);
+ strscpy(req->module, module_name, MODULE_NAME_LEN);
list_add_tail(&req->list, &nft_net->module_list);
return -EAGAIN;
@@ -820,7 +888,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = nft_net->base_seq;
+ cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -1072,6 +1140,30 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
return strcmp(obj->key.name, k->name);
}
+static bool nft_supported_family(u8 family)
+{
+ return false
+#ifdef CONFIG_NF_TABLES_INET
+ || family == NFPROTO_INET
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+ || family == NFPROTO_IPV4
+#endif
+#ifdef CONFIG_NF_TABLES_ARP
+ || family == NFPROTO_ARP
+#endif
+#ifdef CONFIG_NF_TABLES_NETDEV
+ || family == NFPROTO_NETDEV
+#endif
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
+ || family == NFPROTO_BRIDGE
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+ || family == NFPROTO_IPV6
+#endif
+ ;
+}
+
static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
@@ -1086,6 +1178,9 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
u32 flags = 0;
int err;
+ if (!nft_supported_family(family))
+ return -EOPNOTSUPP;
+
lockdep_assert_held(&nft_net->commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask,
@@ -1113,16 +1208,16 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
}
err = -ENOMEM;
- table = kzalloc(sizeof(*table), GFP_KERNEL);
+ table = kzalloc(sizeof(*table), GFP_KERNEL_ACCOUNT);
if (table == NULL)
goto err_kzalloc;
- table->name = nla_strdup(attr, GFP_KERNEL);
+ table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (table->name == NULL)
goto err_strdup;
if (nla[NFTA_TABLE_USERDATA]) {
- table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
+ table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL_ACCOUNT);
if (table->udata == NULL)
goto err_table_udata;
@@ -1139,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
- table->handle = ++table_handle;
+ table->handle = ++nft_net->table_handle;
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;
@@ -1609,7 +1704,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = nft_net->base_seq;
+ cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -1803,7 +1898,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
struct nft_hook *hook;
int err;
- hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+ hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
@@ -1820,7 +1915,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
goto err_hook_dev;
}
hook->ops.dev = dev;
- hook->inactive = false;
return hook;
@@ -2011,7 +2105,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
prule = (struct nft_rule_dp *)ptr;
prule->is_last = 1;
- ptr += offsetof(struct nft_rule_dp, data);
/* blob size does not include the trailer rule */
}
@@ -2027,7 +2120,7 @@ static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
if (size > INT_MAX)
return NULL;
- blob = kvmalloc(size, GFP_KERNEL);
+ blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);
if (!blob)
return NULL;
@@ -2073,8 +2166,10 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
+ !nft_chain_offload_support(basechain)) {
+ list_splice_init(&basechain->hook_list, &hook->list);
return -EOPNOTSUPP;
+ }
flow_block_init(&basechain->flow_block);
@@ -2104,7 +2199,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
- struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_rule_blob *blob;
@@ -2117,6 +2211,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return -EOVERFLOW;
if (nla[NFTA_CHAIN_HOOK]) {
+ struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook;
if (flags & NFT_CHAIN_BINDING)
@@ -2127,7 +2222,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
return err;
- basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+ basechain = kzalloc(sizeof(*basechain), GFP_KERNEL_ACCOUNT);
if (basechain == NULL) {
nft_chain_release_hook(&hook);
return -ENOMEM;
@@ -2142,22 +2237,24 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
- static_branch_inc(&nft_counters_enabled);
}
err = nft_basechain_init(basechain, family, &hook, flags);
if (err < 0) {
nft_chain_release_hook(&hook);
kfree(basechain);
+ free_percpu(stats);
return err;
}
+ if (stats)
+ static_branch_inc(&nft_counters_enabled);
} else {
if (flags & NFT_CHAIN_BASE)
return -EINVAL;
if (flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
- chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+ chain = kzalloc(sizeof(*chain), GFP_KERNEL_ACCOUNT);
if (chain == NULL)
return -ENOMEM;
@@ -2170,7 +2267,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
chain->table = table;
if (nla[NFTA_CHAIN_NAME]) {
- chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
} else {
if (!(flags & NFT_CHAIN_BINDING)) {
err = -EINVAL;
@@ -2178,7 +2275,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
}
snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
- chain->name = kstrdup(name, GFP_KERNEL);
+ chain->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
}
if (!chain->name) {
@@ -2187,7 +2284,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
}
if (nla[NFTA_CHAIN_USERDATA]) {
- chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
+ chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL_ACCOUNT);
if (chain->udata == NULL) {
err = -ENOMEM;
goto err_destroy_chain;
@@ -2350,7 +2447,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
char *name;
err = -ENOMEM;
- name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL_ACCOUNT);
if (!name)
goto err;
@@ -2379,6 +2476,7 @@ err:
}
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -2389,6 +2487,7 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
struct nft_chain *chain = trans->ctx.chain;
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
+ chain->table == table &&
id == nft_trans_chain_id(trans))
return chain;
}
@@ -2478,6 +2577,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain != NULL) {
+ if (chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
@@ -2795,27 +2897,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
err = nf_tables_expr_parse(ctx, nla, &expr_info);
if (err < 0)
- goto err1;
+ goto err_expr_parse;
+
+ err = -EOPNOTSUPP;
+ if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err_expr_stateful;
err = -ENOMEM;
- expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
+ expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
if (expr == NULL)
- goto err2;
+ goto err_expr_stateful;
err = nf_tables_newexpr(ctx, &expr_info, expr);
if (err < 0)
- goto err3;
+ goto err_expr_new;
return expr;
-err3:
+err_expr_new:
kfree(expr);
-err2:
+err_expr_stateful:
owner = expr_info.ops->type->owner;
if (expr_info.ops->type->release_ops)
expr_info.ops->type->release_ops(expr_info.ops);
module_put(owner);
-err1:
+err_expr_parse:
return ERR_PTR(err);
}
@@ -3049,7 +3155,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = nft_net->base_seq;
+ cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -3243,6 +3349,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
+
+ cond_resched();
}
return 0;
@@ -3272,6 +3380,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla);
#define NFT_RULE_MAXEXPRS 128
@@ -3318,7 +3427,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]);
+ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
@@ -3360,7 +3469,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(old_rule);
}
} else if (nla[NFTA_RULE_POSITION_ID]) {
- old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
+ old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
return PTR_ERR(old_rule);
@@ -3406,7 +3515,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
err = -ENOMEM;
- rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
+ rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL_ACCOUNT);
if (rule == NULL)
goto err_release_expr;
@@ -3505,6 +3614,7 @@ err_release_expr:
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -3515,6 +3625,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
struct nft_rule *rule = nft_trans_rule(trans);
if (trans->msg_type == NFT_MSG_NEWRULE &&
+ trans->ctx.chain == chain &&
id == nft_trans_rule_id(trans))
return rule;
}
@@ -3564,7 +3675,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
err = nft_delrule(&ctx, rule);
} else if (nla[NFTA_RULE_ID]) {
- rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
+ rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
return PTR_ERR(rule);
@@ -3743,6 +3854,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
}
static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -3754,6 +3866,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
struct nft_set *set = nft_trans_set(trans);
if (id == nft_trans_set_id(trans) &&
+ set->table == table &&
nft_active_genmask(set, genmask))
return set;
}
@@ -3774,7 +3887,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
if (!nla_set_id)
return set;
- set = nft_set_lookup_byid(net, nla_set_id, genmask);
+ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
}
return set;
}
@@ -3800,7 +3913,7 @@ cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
- if (!nft_is_active_next(ctx->net, set))
+ if (!nft_is_active_next(ctx->net, i))
continue;
if (!sscanf(i->name, name, &tmp))
continue;
@@ -3819,7 +3932,7 @@ cont:
free_page((unsigned long)inuse);
}
- set->name = kasprintf(GFP_KERNEL, name, min + n);
+ set->name = kasprintf(GFP_KERNEL_ACCOUNT, name, min + n);
if (!set->name)
return -ENOMEM;
@@ -4026,7 +4139,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = nft_net->base_seq;
+ cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (ctx->family != NFPROTO_UNSPEC &&
@@ -4164,6 +4277,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
u32 len;
int err;
+ if (desc->field_count >= ARRAY_SIZE(desc->field_len))
+ return -E2BIG;
+
err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
nft_concat_policy, NULL);
if (err < 0)
@@ -4173,9 +4289,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
return -EINVAL;
len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
-
- if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT)
- return -E2BIG;
+ if (!len || len > U8_MAX)
+ return -EINVAL;
desc->field_len[desc->field_count++] = len;
@@ -4186,7 +4301,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
const struct nlattr *nla)
{
struct nlattr *attr;
- int rem, err;
+ u32 num_regs = 0;
+ int rem, err, i;
nla_for_each_nested(attr, nla, rem) {
if (nla_type(attr) != NFTA_LIST_ELEM)
@@ -4197,6 +4313,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
return err;
}
+ for (i = 0; i < desc->field_count; i++)
+ num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+
+ if (num_regs > NFT_REG32_COUNT)
+ return -E2BIG;
+
return 0;
}
@@ -4335,6 +4457,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
+
+ if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
+ return -EINVAL;
+ } else if (flags & NFT_SET_CONCAT) {
+ return -EINVAL;
}
if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
@@ -4383,11 +4510,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
- set = kvzalloc(alloc_size, GFP_KERNEL);
+ set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
if (!set)
return -ENOMEM;
- name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+ name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
if (!name) {
err = -ENOMEM;
goto err_set_name;
@@ -4503,7 +4630,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
nft_set_elem_destroy(set, catchall->elem, true);
- kfree_rcu(catchall);
+ kfree_rcu(catchall, rcu);
}
}
@@ -4945,6 +5072,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
+ cb->seq = READ_ONCE(nft_net->base_seq);
+
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
dump_ctx->ctx.family != table->family)
@@ -5080,6 +5209,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
if (!(set->flags & NFT_SET_INTERVAL) &&
*flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
+ if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) ==
+ (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
+ return -EINVAL;
return 0;
}
@@ -5087,19 +5219,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data *key, struct nlattr *attr)
{
- struct nft_data_desc desc;
- int err;
-
- err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr);
- if (err < 0)
- return err;
-
- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
- nft_data_release(key, desc.type);
- return -EINVAL;
- }
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = NFT_DATA_VALUE_MAXLEN,
+ .len = set->klen,
+ };
- return 0;
+ return nft_data_init(ctx, key, &desc, attr);
}
static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
@@ -5107,18 +5233,19 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data *data,
struct nlattr *attr)
{
- int err;
+ u32 dtype;
- err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
- if (err < 0)
- return err;
+ if (set->dtype == NFT_DATA_VERDICT)
+ dtype = NFT_DATA_VERDICT;
+ else
+ dtype = NFT_DATA_VALUE;
- if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
- nft_data_release(data, desc->type);
- return -EINVAL;
- }
+ desc->type = dtype;
+ desc->size = NFT_DATA_VALUE_MAXLEN;
+ desc->len = set->dlen;
+ desc->flags = NFT_DATA_DESC_SETELEM;
- return 0;
+ return nft_data_init(ctx, data, desc, attr);
}
static void *nft_setelem_catchall_get(const struct net *net,
@@ -5266,8 +5393,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_get_set_elem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
break;
+ }
}
return err;
@@ -5335,9 +5464,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
return expr;
err = -EOPNOTSUPP;
- if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))
- goto err_set_elem_expr;
-
if (expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err_set_elem_expr;
@@ -5353,6 +5479,27 @@ err_set_elem_expr:
return ERR_PTR(err);
}
+static int nft_set_ext_check(const struct nft_set_ext_tmpl *tmpl, u8 id, u32 len)
+{
+ len += nft_set_ext_types[id].len;
+ if (len > tmpl->ext_len[id] ||
+ len > U8_MAX)
+ return -1;
+
+ return 0;
+}
+
+static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id,
+ void *to, const void *from, u32 len)
+{
+ if (nft_set_ext_check(tmpl, id, len) < 0)
+ return -1;
+
+ memcpy(to, from, len);
+
+ return 0;
+}
+
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end,
@@ -5363,17 +5510,26 @@ void *nft_set_elem_init(const struct nft_set *set,
elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
if (elem == NULL)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ext = nft_set_elem_ext(set, elem);
nft_set_ext_init(ext, tmpl);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY))
- memcpy(nft_set_ext_key(ext), key, set->klen);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
- memcpy(nft_set_ext_key_end(ext), key_end, set->klen);
- if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
- memcpy(nft_set_ext_data(ext), data, set->dlen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) &&
+ nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY,
+ nft_set_ext_key(ext), key, set->klen) < 0)
+ goto err_ext_check;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END) &&
+ nft_set_ext_memcpy(tmpl, NFT_SET_EXT_KEY_END,
+ nft_set_ext_key_end(ext), key_end, set->klen) < 0)
+ goto err_ext_check;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_set_ext_memcpy(tmpl, NFT_SET_EXT_DATA,
+ nft_set_ext_data(ext), data, set->dlen) < 0)
+ goto err_ext_check;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
*nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
if (expiration == 0)
@@ -5383,6 +5539,11 @@ void *nft_set_elem_init(const struct nft_set *set,
*nft_set_ext_timeout(ext) = timeout;
return elem;
+
+err_ext_check:
+ kfree(elem);
+
+ return ERR_PTR(-EINVAL);
}
static void __nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
@@ -5448,13 +5609,13 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
int err, i, k;
for (i = 0; i < set->num_exprs; i++) {
- expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL);
+ expr = kzalloc(set->exprs[i]->ops->size, GFP_KERNEL_ACCOUNT);
if (!expr)
goto err_expr;
err = nft_expr_clone(expr, set->exprs[i]);
if (err < 0) {
- nft_expr_destroy(ctx, expr);
+ kfree(expr);
goto err_expr;
}
expr_array[i] = expr;
@@ -5470,14 +5631,25 @@ err_expr:
}
static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
+ const struct nft_set_ext_tmpl *tmpl,
const struct nft_set_ext *ext,
struct nft_expr *expr_array[],
u32 num_exprs)
{
struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext);
+ u32 len = sizeof(struct nft_set_elem_expr);
struct nft_expr *expr;
int i, err;
+ if (num_exprs == 0)
+ return 0;
+
+ for (i = 0; i < num_exprs; i++)
+ len += expr_array[i]->ops->size;
+
+ if (nft_set_ext_check(tmpl, NFT_SET_EXT_EXPRESSIONS, len) < 0)
+ return -EINVAL;
+
for (i = 0; i < num_exprs; i++) {
expr = nft_setelem_expr_at(elem_expr, elem_expr->size);
err = nft_expr_clone(expr, expr_array[i]);
@@ -5670,7 +5842,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
if (catchall->elem == elem->priv) {
list_del_rcu(&catchall->list);
- kfree_rcu(catchall);
+ kfree_rcu(catchall, rcu);
break;
}
}
@@ -5686,6 +5858,25 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem);
}
+static bool nft_setelem_valid_key_end(const struct nft_set *set,
+ struct nlattr **nla, u32 flags)
+{
+ if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) ==
+ (NFT_SET_CONCAT | NFT_SET_INTERVAL)) {
+ if (flags & NFT_SET_ELEM_INTERVAL_END)
+ return false;
+
+ if (nla[NFTA_SET_ELEM_KEY_END] &&
+ flags & NFT_SET_ELEM_CATCHALL)
+ return false;
+ } else {
+ if (nla[NFTA_SET_ELEM_KEY_END])
+ return false;
+ }
+
+ return true;
+}
+
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
@@ -5721,8 +5912,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
return -EINVAL;
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (flags != 0) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (err < 0)
+ return err;
+ }
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
@@ -5733,6 +5927,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return -EINVAL;
}
+ if (set->flags & NFT_SET_OBJECT) {
+ if (!nla[NFTA_SET_ELEM_OBJREF] &&
+ !(flags & NFT_SET_ELEM_INTERVAL_END))
+ return -EINVAL;
+ } else {
+ if (nla[NFTA_SET_ELEM_OBJREF])
+ return -EINVAL;
+ }
+
+ if (!nft_setelem_valid_key_end(set, nla, flags))
+ return -EINVAL;
+
if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
(nla[NFTA_SET_ELEM_DATA] ||
nla[NFTA_SET_ELEM_OBJREF] ||
@@ -5740,6 +5946,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nla[NFTA_SET_ELEM_EXPIRATION] ||
nla[NFTA_SET_ELEM_USERDATA] ||
nla[NFTA_SET_ELEM_EXPR] ||
+ nla[NFTA_SET_ELEM_KEY_END] ||
nla[NFTA_SET_ELEM_EXPRESSIONS]))
return -EINVAL;
@@ -5831,7 +6038,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_set_elem_expr;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (err < 0)
+ goto err_parse_key;
}
if (nla[NFTA_SET_ELEM_KEY_END]) {
@@ -5840,29 +6049,34 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (timeout > 0) {
- nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
- if (timeout != set->timeout)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (err < 0)
+ goto err_parse_key_end;
+
+ if (timeout != set->timeout) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ if (err < 0)
+ goto err_parse_key_end;
+ }
}
if (num_exprs) {
for (i = 0; i < num_exprs; i++)
size += expr_array[i]->ops->size;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
- sizeof(struct nft_set_elem_expr) +
- size);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
+ sizeof(struct nft_set_elem_expr) + size);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
- if (!(set->flags & NFT_SET_OBJECT)) {
- err = -EINVAL;
- goto err_parse_key_end;
- }
obj = nft_obj_lookup(ctx->net, ctx->table,
nla[NFTA_SET_ELEM_OBJREF],
set->objtype, genmask);
@@ -5870,7 +6084,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = PTR_ERR(obj);
goto err_parse_key_end;
}
- nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
@@ -5904,7 +6120,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
NFT_VALIDATE_NEED);
}
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+ if (err < 0)
+ goto err_parse_data;
}
/* The full maximum length of userdata can exceed the maximum
@@ -5914,22 +6132,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ulen = 0;
if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
- if (ulen > 0)
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
- ulen);
+ if (ulen > 0) {
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ if (err < 0)
+ goto err_parse_data;
+ }
}
- err = -ENOMEM;
elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
elem.key_end.val.data, elem.data.val.data,
- timeout, expiration, GFP_KERNEL);
- if (elem.priv == NULL)
+ timeout, expiration, GFP_KERNEL_ACCOUNT);
+ if (IS_ERR(elem.priv)) {
+ err = PTR_ERR(elem.priv);
goto err_parse_data;
+ }
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
*nft_set_ext_flags(ext) = flags;
+
if (ulen > 0) {
+ if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
+ err = -EINVAL;
+ goto err_elem_userdata;
+ }
udata = nft_set_ext_userdata(ext);
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
@@ -5938,14 +6165,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
*nft_set_ext_obj(ext) = obj;
obj->use++;
}
- err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs);
+ err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs);
if (err < 0)
- goto err_elem_expr;
+ goto err_elem_free;
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
- goto err_elem_expr;
+ goto err_elem_free;
}
ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
@@ -5991,10 +6218,10 @@ err_set_full:
nft_setelem_remove(ctx->net, set, &elem);
err_element_clash:
kfree(trans);
-err_elem_expr:
+err_elem_free:
if (obj)
obj->use--;
-
+err_elem_userdata:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
@@ -6047,8 +6274,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
return err;
+ }
}
if (nft_net->validate_state == NFT_VALIDATE_DO)
@@ -6140,10 +6369,16 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
return -EINVAL;
+ if (!nft_setelem_valid_key_end(set, nla, flags))
+ return -EINVAL;
+
nft_set_ext_prepare(&tmpl);
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (flags != 0) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (err < 0)
+ return err;
+ }
if (nla[NFTA_SET_ELEM_KEY]) {
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
@@ -6151,24 +6386,30 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
return err;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (err < 0)
+ goto fail_elem;
}
if (nla[NFTA_SET_ELEM_KEY_END]) {
err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
nla[NFTA_SET_ELEM_KEY_END]);
if (err < 0)
- return err;
+ goto fail_elem;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ if (err < 0)
+ goto fail_elem_key_end;
}
err = -ENOMEM;
elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
elem.key_end.val.data, NULL, 0, 0,
- GFP_KERNEL);
- if (elem.priv == NULL)
- goto fail_elem;
+ GFP_KERNEL_ACCOUNT);
+ if (IS_ERR(elem.priv)) {
+ err = PTR_ERR(elem.priv);
+ goto fail_elem_key_end;
+ }
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
@@ -6192,6 +6433,8 @@ fail_ops:
kfree(trans);
fail_trans:
kfree(elem.priv);
+fail_elem_key_end:
+ nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
fail_elem:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
return err;
@@ -6318,8 +6561,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
break;
+ }
}
return err;
}
@@ -6478,7 +6723,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
}
err = -ENOMEM;
- obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
+ obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL_ACCOUNT);
if (!obj)
goto err2;
@@ -6552,12 +6797,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
{
struct nft_object *newobj;
struct nft_trans *trans;
- int err;
+ int err = -ENOMEM;
+
+ if (!try_module_get(type->owner))
+ return -ENOENT;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
- return -ENOMEM;
+ goto err_trans;
newobj = nft_obj_init(ctx, type, attr);
if (IS_ERR(newobj)) {
@@ -6574,6 +6822,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
err_free_trans:
kfree(trans);
+err_trans:
+ module_put(type->owner);
return err;
}
@@ -6639,7 +6889,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
obj->key.table = table;
obj->handle = nf_tables_alloc_handle(table);
- obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+ obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL_ACCOUNT);
if (!obj->key.name) {
err = -ENOMEM;
goto err_strdup;
@@ -6738,7 +6988,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = nft_net->base_seq;
+ cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -7208,13 +7458,25 @@ static void nft_unregister_flowtable_hook(struct net *net,
FLOW_BLOCK_UNBIND);
}
-static void nft_unregister_flowtable_net_hooks(struct net *net,
- struct list_head *hook_list)
+static void __nft_unregister_flowtable_net_hooks(struct net *net,
+ struct list_head *hook_list,
+ bool release_netdev)
{
- struct nft_hook *hook;
+ struct nft_hook *hook, *next;
- list_for_each_entry(hook, hook_list, list)
+ list_for_each_entry_safe(hook, next, hook_list, list) {
nf_unregister_net_hook(net, &hook->ops);
+ if (release_netdev) {
+ list_del(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
+}
+
+static void nft_unregister_flowtable_net_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ __nft_unregister_flowtable_net_hooks(net, hook_list, false);
}
static int nft_register_flowtable_net_hooks(struct net *net,
@@ -7307,11 +7569,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
if (nla[NFTA_FLOWTABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
- if (flags & ~NFT_FLOWTABLE_MASK)
- return -EOPNOTSUPP;
+ if (flags & ~NFT_FLOWTABLE_MASK) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
- (flags & NFT_FLOWTABLE_HW_OFFLOAD))
- return -EOPNOTSUPP;
+ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
} else {
flags = flowtable->data.flags;
}
@@ -7400,7 +7666,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
- flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+ flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
if (!flowtable)
return -ENOMEM;
@@ -7408,7 +7674,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
flowtable->handle = nf_tables_alloc_handle(table);
INIT_LIST_HEAD(&flowtable->hook_list);
- flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
+ flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL_ACCOUNT);
if (!flowtable->name) {
err = -ENOMEM;
goto err1;
@@ -7492,6 +7758,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
+ LIST_HEAD(flowtable_del_list);
struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
@@ -7507,7 +7774,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
err = -ENOENT;
goto err_flowtable_del_hook;
}
- hook->inactive = true;
+ list_move(&hook->list, &flowtable_del_list);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
@@ -7520,6 +7787,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
nft_flowtable_hook_release(&flowtable_hook);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -7527,13 +7795,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
return 0;
err_flowtable_del_hook:
- list_for_each_entry(this, &flowtable_hook.list, list) {
- hook = nft_hook_list_find(&flowtable->hook_list, this);
- if (!hook)
- break;
-
- hook->inactive = false;
- }
+ list_splice(&flowtable_del_list, &flowtable->hook_list);
nft_flowtable_hook_release(&flowtable_hook);
return err;
@@ -7658,7 +7920,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = nft_net->base_seq;
+ cb->seq = READ_ONCE(nft_net->base_seq);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -8186,7 +8448,7 @@ static void nft_obj_commit_update(struct nft_trans *trans)
if (obj->ops->update)
obj->ops->update(obj, newobj);
- kfree(newobj);
+ nft_obj_destroy(&trans->ctx, newobj);
}
static void nft_commit_release(struct nft_trans *trans)
@@ -8256,6 +8518,12 @@ void nf_tables_trans_destroy_flush_work(void)
}
EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
+static bool nft_expr_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ return false;
+}
+
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
{
const struct nft_expr *expr, *last;
@@ -8264,17 +8532,13 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
void *data, *data_boundary;
struct nft_rule_dp *prule;
struct nft_rule *rule;
- int i;
/* already handled or inactive chain? */
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
- rule = list_entry(&chain->rules, struct nft_rule, list);
- i = 0;
-
data_size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule)) {
data_size += sizeof(*prule) + rule->dlen;
if (data_size > INT_MAX)
@@ -8291,7 +8555,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
data_boundary = data + data_size;
size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(net, rule))
continue;
@@ -8301,12 +8565,11 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
return -ENOMEM;
size = 0;
- track.last = last;
+ track.last = nft_expr_last(rule);
nft_rule_for_each_expr(expr, last, rule) {
track.cur = expr;
- if (expr->ops->reduce &&
- expr->ops->reduce(&track, expr)) {
+ if (nft_expr_reduce(&track, expr)) {
expr = track.cur;
continue;
}
@@ -8436,17 +8699,6 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
-static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
- struct list_head *hook_list)
-{
- struct nft_hook *hook, *next;
-
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- if (hook->inactive)
- list_move(&hook->list, hook_list);
- }
-}
-
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -8598,6 +8850,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ unsigned int base_seq;
LIST_HEAD(adl);
int err;
@@ -8647,9 +8900,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
* Bump generation counter, invalidate any dump in progress.
* Cannot fail after this point.
*/
- while (++nft_net->base_seq == 0)
+ base_seq = READ_ONCE(nft_net->base_seq);
+ while (++base_seq == 0)
;
+ WRITE_ONCE(nft_net->base_seq, base_seq);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -8701,6 +8957,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
@@ -8711,6 +8970,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
NFT_TRANS_COMMIT);
+
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
nft_clear(net, nft_trans_set(trans));
@@ -8791,8 +9053,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nft_flowtable_hooks_del(nft_trans_flowtable(trans),
- &nft_trans_flowtable_hooks(trans));
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
@@ -8873,7 +9133,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- struct nft_hook *hook;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
@@ -8979,7 +9238,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
- kfree(nft_trans_obj_newobj(trans));
+ nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
@@ -9004,8 +9263,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
- hook->inactive = false;
+ list_splice(&nft_trans_flowtable_hooks(trans),
+ &nft_trans_flowtable(trans)->hook_list);
} else {
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
@@ -9268,17 +9527,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
}
EXPORT_SYMBOL_GPL(nft_parse_u32_check);
-static unsigned int nft_parse_register(const struct nlattr *attr)
+static int nft_parse_register(const struct nlattr *attr, u32 *preg)
{
unsigned int reg;
reg = ntohl(nla_get_be32(attr));
switch (reg) {
case NFT_REG_VERDICT...NFT_REG_4:
- return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ break;
+ case NFT_REG32_00...NFT_REG32_15:
+ *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ break;
default:
- return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ return -ERANGE;
}
+
+ return 0;
}
/**
@@ -9320,7 +9585,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
u32 reg;
int err;
- reg = nft_parse_register(attr);
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
err = nft_validate_register_load(reg, len);
if (err < 0)
return err;
@@ -9375,7 +9643,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx,
int err;
u32 reg;
- reg = nft_parse_register(attr);
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
err = nft_validate_register_store(ctx, reg, data, type, len);
if (err < 0)
return err;
@@ -9431,7 +9702,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
tb[NFTA_VERDICT_CHAIN],
genmask);
} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(ctx->net,
+ chain = nft_chain_lookup_byid(ctx->net, ctx->table,
tb[NFTA_VERDICT_CHAIN_ID]);
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -9443,6 +9714,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (nft_chain_is_bound(chain))
+ return -EINVAL;
+ if (desc->flags & NFT_DATA_DESC_SETELEM &&
+ chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
chain->use++;
data->verdict.chain = chain;
@@ -9450,7 +9726,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
}
desc->len = sizeof(data->verdict);
- desc->type = NFT_DATA_VERDICT;
+
return 0;
}
@@ -9503,20 +9779,25 @@ nla_put_failure:
}
static int nft_value_init(const struct nft_ctx *ctx,
- struct nft_data *data, unsigned int size,
- struct nft_data_desc *desc, const struct nlattr *nla)
+ struct nft_data *data, struct nft_data_desc *desc,
+ const struct nlattr *nla)
{
unsigned int len;
len = nla_len(nla);
if (len == 0)
return -EINVAL;
- if (len > size)
+ if (len > desc->size)
return -EOVERFLOW;
+ if (desc->len) {
+ if (len != desc->len)
+ return -EINVAL;
+ } else {
+ desc->len = len;
+ }
nla_memcpy(data->data, nla, len);
- desc->type = NFT_DATA_VALUE;
- desc->len = len;
+
return 0;
}
@@ -9536,7 +9817,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
*
* @ctx: context of the expression using the data
* @data: destination struct nft_data
- * @size: maximum data length
* @desc: data description
* @nla: netlink attribute containing data
*
@@ -9546,24 +9826,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
* The caller can indicate that it only wants to accept data of type
* NFT_DATA_VALUE by passing NULL for the ctx argument.
*/
-int nft_data_init(const struct nft_ctx *ctx,
- struct nft_data *data, unsigned int size,
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_data_desc *desc, const struct nlattr *nla)
{
struct nlattr *tb[NFTA_DATA_MAX + 1];
int err;
+ if (WARN_ON_ONCE(!desc->size))
+ return -EINVAL;
+
err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
nft_data_policy, NULL);
if (err < 0)
return err;
- if (tb[NFTA_DATA_VALUE])
- return nft_value_init(ctx, data, size, desc,
- tb[NFTA_DATA_VALUE]);
- if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
- return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
- return -EINVAL;
+ if (tb[NFTA_DATA_VALUE]) {
+ if (desc->type != NFT_DATA_VALUE)
+ return -EINVAL;
+
+ err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) {
+ if (desc->type != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+ err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+ } else {
+ err = -EINVAL;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(nft_data_init);
@@ -9639,10 +9930,14 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain);
static void __nft_release_hook(struct net *net, struct nft_table *table)
{
+ struct nft_flowtable *flowtable;
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
+ __nf_tables_unregister_hook(net, table, chain, true);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
+ true);
}
static void __nft_release_hooks(struct net *net)
@@ -9735,6 +10030,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+ if (!list_empty(&nf_tables_destroy_list))
+ rcu_barrier();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -9781,7 +10078,11 @@ static int __net_init nf_tables_init_net(struct net *net)
static void __net_exit nf_tables_pre_exit_net(struct net *net)
{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ mutex_lock(&nft_net->commit_mutex);
__nft_release_hooks(net);
+ mutex_unlock(&nft_net->commit_mutex);
}
static void __net_exit nf_tables_exit_net(struct net *net)
@@ -9789,7 +10090,8 @@ static void __net_exit nf_tables_exit_net(struct net *net)
struct nftables_pernet *nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
- if (!list_empty(&nft_net->commit_list))
+ if (!list_empty(&nft_net->commit_list) ||
+ !list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
__nft_release_tables(net);
mutex_unlock(&nft_net->commit_mutex);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 36e73f9828c5..cee3e4e905ec 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
enum nft_trace_types type)
{
- const struct nft_pktinfo *pkt = info->pkt;
-
- if (!info->trace || !pkt->skb->nf_trace)
+ if (!info->trace || !info->nf_trace)
return;
info->chain = chain;
@@ -36,17 +34,28 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
nft_trace_notify(info);
}
-static inline void nft_trace_packet(struct nft_traceinfo *info,
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_rule_dp *rule,
enum nft_trace_types type)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
+ info->nf_trace = pkt->skb->nf_trace;
info->rule = rule;
__nft_trace_packet(info, chain, type);
}
}
+static inline void nft_trace_copy_nftrace(const struct nft_pktinfo *pkt,
+ struct nft_traceinfo *info)
+{
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ if (info->trace)
+ info->nf_trace = pkt->skb->nf_trace;
+ }
+}
+
static void nft_bitwise_fast_eval(const struct nft_expr *expr,
struct nft_regs *regs)
{
@@ -67,6 +76,20 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
+static void nft_cmp16_fast_eval(const struct nft_expr *expr,
+ struct nft_regs *regs)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ const u64 *reg_data = (const u64 *)&regs->data[priv->sreg];
+ const u64 *mask = (const u64 *)&priv->mask;
+ const u64 *data = (const u64 *)&priv->data;
+
+ if (((reg_data[0] & mask[0]) == data[0] &&
+ ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv)
+ return;
+ regs->verdict.code = NFT_BREAK;
+}
+
static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
const struct nft_chain *chain,
const struct nft_regs *regs)
@@ -78,8 +101,15 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
break;
+ case NF_STOLEN:
+ type = NFT_TRACETYPE_RULE;
+ /* can't access skb->nf_trace; use copy */
+ break;
default:
type = NFT_TRACETYPE_RULE;
+
+ if (info->trace)
+ info->nf_trace = info->pkt->skb->nf_trace;
break;
}
@@ -201,7 +231,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
const struct nft_rule_dp *rule, *last_rule;
const struct net *net = nft_net(pkt);
const struct nft_expr *expr, *last;
- struct nft_regs regs;
+ struct nft_regs regs = {};
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
@@ -225,6 +255,8 @@ next_rule:
nft_rule_dp_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs);
+ else if (expr->ops == &nft_cmp16_fast_ops)
+ nft_cmp16_fast_eval(expr, &regs);
else if (expr->ops == &nft_bitwise_fast_ops)
nft_bitwise_fast_eval(expr, &regs);
else if (expr->ops != &nft_payload_fast_ops ||
@@ -238,9 +270,10 @@ next_rule:
switch (regs.verdict.code) {
case NFT_BREAK:
regs.verdict.code = NFT_CONTINUE;
+ nft_trace_copy_nftrace(pkt, &info);
continue;
case NFT_CONTINUE:
- nft_trace_packet(&info, chain, rule,
+ nft_trace_packet(pkt, &info, chain, rule,
NFT_TRACETYPE_RULE);
continue;
}
@@ -284,7 +317,7 @@ next_rule:
goto next_rule;
}
- nft_trace_packet(&info, basechain, NULL, NFT_TRACETYPE_POLICY);
+ nft_trace_packet(pkt, &info, basechain, NULL, NFT_TRACETYPE_POLICY);
if (static_branch_unlikely(&nft_counters_enabled))
nft_update_chain_stats(basechain, pkt);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 9656c1646222..910ef881c3b8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_first(rule);
while (nft_expr_more(rule, expr)) {
- if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
+ if (expr->ops->offload_action &&
+ expr->ops->offload_action(expr))
num_actions++;
expr = nft_expr_next(expr);
@@ -207,7 +208,7 @@ static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
return 0;
}
-int nft_chain_offload_priority(struct nft_base_chain *basechain)
+static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
{
if (basechain->ops.priority <= 0 ||
basechain->ops.priority > USHRT_MAX)
@@ -216,6 +217,27 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
+bool nft_chain_offload_support(const struct nft_base_chain *basechain)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+
+ if (nft_chain_offload_priority(basechain) < 0)
+ return false;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.pf != NFPROTO_NETDEV ||
+ hook->ops.hooknum != NF_NETDEV_INGRESS)
+ return false;
+
+ dev = hook->ops.dev;
+ if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
+ return false;
+ }
+
+ return true;
+}
+
static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 5041725423c2..1163ba9c1401 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -7,7 +7,7 @@
#include <linux/module.h>
#include <linux/static_key.h>
#include <linux/hash.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/skbuff.h>
@@ -25,22 +25,6 @@
DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
EXPORT_SYMBOL_GPL(nft_trace_enabled);
-static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
-{
- __be32 id;
-
- /* using skb address as ID results in a limited number of
- * values (and quick reuse).
- *
- * So we attempt to use as many skb members that will not
- * change while skb is with netfilter.
- */
- id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
- skb->skb_iif);
-
- return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
-}
-
static int trace_fill_header(struct sk_buff *nlskb, u16 type,
const struct sk_buff *skb,
int off, unsigned int len)
@@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
+ u32 mark = 0;
u16 event;
if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
@@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
goto nla_put_failure;
- if (trace_fill_id(skb, pkt->skb))
+ if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
@@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info)
case NFT_TRACETYPE_RULE:
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
goto nla_put_failure;
+
+ /* pkt->skb undefined iff NF_STOLEN, disable dump */
+ if (info->verdict->code == NF_STOLEN)
+ info->packet_dumped = true;
+ else
+ mark = pkt->skb->mark;
+
break;
case NFT_TRACETYPE_POLICY:
+ mark = pkt->skb->mark;
+
if (nla_put_be32(skb, NFTA_TRACE_POLICY,
htonl(info->basechain->policy)))
goto nla_put_failure;
break;
}
- if (pkt->skb->mark &&
- nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
+ if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
goto nla_put_failure;
if (!info->packet_dumped) {
@@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *chain)
{
+ static siphash_key_t trace_key __read_mostly;
+ struct sk_buff *skb = pkt->skb;
+
info->basechain = nft_base_chain(chain);
info->trace = true;
+ info->nf_trace = pkt->skb->nf_trace;
info->packet_dumped = false;
info->pkt = pkt;
info->verdict = verdict;
+
+ net_get_random_once(&trace_key, sizeof(trace_key));
+
+ info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
+ skb_get_hash(skb),
+ skb->skb_iif,
+ &trace_key);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 7e2c8dd01408..6d18fb346868 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
static unsigned int nfnetlink_pernet_id __read_mostly;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static DEFINE_SPINLOCK(nfnl_grp_active_lock);
+#endif
+
struct nfnl_net {
struct sock *nfnl;
};
@@ -290,6 +294,7 @@ replay:
nfnl_lock(subsys_id);
if (nfnl_dereference_protected(subsys_id) != ss ||
nfnetlink_find_client(type, ss) != nc) {
+ nfnl_unlock(subsys_id);
err = -EAGAIN;
break;
}
@@ -626,7 +631,7 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
nfgenmsg = nlmsg_data(nlh);
skb_pull(skb, msglen);
/* Work around old nft using host byte order */
- if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
+ if (nfgenmsg->res_id == (__force __be16)NFNL_SUBSYS_NFTABLES)
res_id = NFNL_SUBSYS_NFTABLES;
else
res_id = ntohs(nfgenmsg->res_id);
@@ -654,7 +659,44 @@ static void nfnetlink_rcv(struct sk_buff *skb)
netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}
-#ifdef CONFIG_MODULES
+static void nfnetlink_bind_event(struct net *net, unsigned int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ int type, group_bit;
+ u8 v;
+
+ /* All NFNLGRP_CONNTRACK_* group bits fit into u8.
+ * The other groups are not relevant and can be ignored.
+ */
+ if (group >= 8)
+ return;
+
+ type = nfnl_group2type[group];
+
+ switch (type) {
+ case NFNL_SUBSYS_CTNETLINK:
+ break;
+ case NFNL_SUBSYS_CTNETLINK_EXP:
+ break;
+ default:
+ return;
+ }
+
+ group_bit = (1 << group);
+
+ spin_lock(&nfnl_grp_active_lock);
+ v = READ_ONCE(net->ct.ctnetlink_has_listener);
+ if ((v & group_bit) == 0) {
+ v |= group_bit;
+
+ /* read concurrently without nfnl_grp_active_lock held. */
+ WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
+ }
+
+ spin_unlock(&nfnl_grp_active_lock);
+#endif
+}
+
static int nfnetlink_bind(struct net *net, int group)
{
const struct nfnetlink_subsystem *ss;
@@ -670,9 +712,48 @@ static int nfnetlink_bind(struct net *net, int group)
rcu_read_unlock();
if (!ss)
request_module_nowait("nfnetlink-subsys-%d", type);
+
+ nfnetlink_bind_event(net, group);
return 0;
}
+
+static void nfnetlink_unbind(struct net *net, int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ int type, group_bit;
+
+ if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
+ return;
+
+ type = nfnl_group2type[group];
+
+ switch (type) {
+ case NFNL_SUBSYS_CTNETLINK:
+ break;
+ case NFNL_SUBSYS_CTNETLINK_EXP:
+ break;
+ default:
+ return;
+ }
+
+ /* ctnetlink_has_listener is u8 */
+ if (group >= 8)
+ return;
+
+ group_bit = (1 << group);
+
+ spin_lock(&nfnl_grp_active_lock);
+ if (!nfnetlink_has_listeners(net, group)) {
+ u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
+
+ v &= ~group_bit;
+
+ /* read concurrently without nfnl_grp_active_lock held. */
+ WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
+ }
+ spin_unlock(&nfnl_grp_active_lock);
#endif
+}
static int __net_init nfnetlink_net_init(struct net *net)
{
@@ -680,9 +761,8 @@ static int __net_init nfnetlink_net_init(struct net *net)
struct netlink_kernel_cfg cfg = {
.groups = NFNLGRP_MAX,
.input = nfnetlink_rcv,
-#ifdef CONFIG_MODULES
.bind = nfnetlink_bind,
-#endif
+ .unbind = nfnetlink_unbind,
};
nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 5c622f55c9d6..97248963a7d3 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -96,11 +96,13 @@ static int
nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
{
struct nf_conn_help *help = nfct_help(ct);
+ const struct nf_conntrack_helper *helper;
if (attr == NULL)
return -EINVAL;
- if (help->helper->data_len == 0)
+ helper = rcu_dereference(help->helper);
+ if (!helper || helper->data_len == 0)
return -EINVAL;
nla_memcpy(help->data, attr, sizeof(help->data));
@@ -111,9 +113,11 @@ static int
nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct)
{
const struct nf_conn_help *help = nfct_help(ct);
+ const struct nf_conntrack_helper *helper;
- if (help->helper->data_len &&
- nla_put(skb, CTA_HELP_INFO, help->helper->data_len, &help->data))
+ helper = rcu_dereference(help->helper);
+ if (helper && helper->data_len &&
+ nla_put(skb, CTA_HELP_INFO, helper->data_len, &help->data))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index c57673d499be..f466af4f8531 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -33,8 +33,20 @@
static unsigned int nfct_timeout_id __read_mostly;
+struct ctnl_timeout {
+ struct list_head head;
+ struct list_head free_head;
+ struct rcu_head rcu_head;
+ refcount_t refcnt;
+ char name[CTNL_TIMEOUT_NAME_MAX];
+
+ /* must be at the end */
+ struct nf_ct_timeout timeout;
+};
+
struct nfct_timeout_pernet {
struct list_head nfct_timeout_list;
+ struct list_head nfct_timeout_freelist;
};
MODULE_LICENSE("GPL");
@@ -158,6 +170,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb,
timeout->timeout.l3num = l3num;
timeout->timeout.l4proto = l4proto;
refcount_set(&timeout->refcnt, 1);
+ __module_get(THIS_MODULE);
list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
return 0;
@@ -506,13 +519,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- if (!try_module_get(THIS_MODULE))
- goto err;
-
- if (!refcount_inc_not_zero(&timeout->refcnt)) {
- module_put(THIS_MODULE);
+ if (!refcount_inc_not_zero(&timeout->refcnt))
goto err;
- }
matching = timeout;
break;
}
@@ -525,10 +533,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
struct ctnl_timeout *timeout =
container_of(t, struct ctnl_timeout, timeout);
- if (refcount_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt)) {
kfree_rcu(timeout, rcu_head);
-
- module_put(THIS_MODULE);
+ module_put(THIS_MODULE);
+ }
}
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
@@ -578,20 +586,36 @@ static int __net_init cttimeout_net_init(struct net *net)
struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
INIT_LIST_HEAD(&pernet->nfct_timeout_list);
+ INIT_LIST_HEAD(&pernet->nfct_timeout_freelist);
return 0;
}
+static void __net_exit cttimeout_net_pre_exit(struct net *net)
+{
+ struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+ struct ctnl_timeout *cur, *tmp;
+
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) {
+ list_del_rcu(&cur->head);
+ list_add(&cur->free_head, &pernet->nfct_timeout_freelist);
+ }
+
+ /* core calls synchronize_rcu() after this */
+}
+
static void __net_exit cttimeout_net_exit(struct net *net)
{
struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
struct ctnl_timeout *cur, *tmp;
- nf_ct_unconfirmed_destroy(net);
+ if (list_empty(&pernet->nfct_timeout_freelist))
+ return;
+
nf_ct_untimeout(net, NULL);
- list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) {
- list_del_rcu(&cur->head);
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) {
+ list_del(&cur->free_head);
if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
@@ -600,11 +624,17 @@ static void __net_exit cttimeout_net_exit(struct net *net)
static struct pernet_operations cttimeout_ops = {
.init = cttimeout_net_init,
+ .pre_exit = cttimeout_net_pre_exit,
.exit = cttimeout_net_exit,
.id = &nfct_timeout_id,
.size = sizeof(struct nfct_timeout_pernet),
};
+static const struct nf_ct_timeout_hooks hooks = {
+ .timeout_find_get = ctnl_timeout_find_get,
+ .timeout_put = ctnl_timeout_put,
+};
+
static int __init cttimeout_init(void)
{
int ret;
@@ -619,8 +649,7 @@ static int __init cttimeout_init(void)
"nfnetlink.\n");
goto err_out;
}
- RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get);
- RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put);
+ RCU_INIT_POINTER(nf_ct_timeout_hook, &hooks);
return 0;
err_out:
@@ -628,14 +657,24 @@ err_out:
return ret;
}
+static int untimeout(struct nf_conn *ct, void *timeout)
+{
+ struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+ if (timeout_ext)
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+ return 0;
+}
+
static void __exit cttimeout_exit(void)
{
nfnetlink_subsys_unregister(&cttimeout_subsys);
unregister_pernet_subsys(&cttimeout_ops);
- RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
- RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
- synchronize_rcu();
+ RCU_INIT_POINTER(nf_ct_timeout_hook, NULL);
+
+ nf_ct_iterate_destroy(untimeout, NULL);
}
module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 71e29adac48b..8120aadf6a0f 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -215,13 +215,6 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
#endif
break;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- if (hook >= ARRAY_SIZE(net->nf.hooks_decnet))
- return ERR_PTR(-EINVAL);
- hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
- break;
-#endif
#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
case NFPROTO_NETDEV:
if (hook >= NF_NETDEV_NUMHOOKS)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index ae9c0756bba5..d97eb280cb2e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -460,6 +460,7 @@ __build_packet_message(struct nfnl_log_net *log,
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
+ ktime_t tstamp;
nlh = nfnl_msg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
@@ -588,9 +589,10 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
+ tstamp = skb_tstamp_cond(skb, false);
+ if (hooknum <= NF_INET_FORWARD && tstamp) {
struct nfulnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+ struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 0fa2e2030427..ee6840bd5933 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb,
struct nf_osf_hdr_ctx ctx;
const struct tcphdr *tcp;
struct tcphdr _tcph;
+ bool found = false;
memset(&ctx, 0, sizeof(ctx));
@@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb,
data->genre = f->genre;
data->version = f->version;
+ found = true;
break;
}
- return true;
+ return found;
}
EXPORT_SYMBOL_GPL(nf_osf_find);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea2d9c2a44cf..87a9009d5234 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -392,6 +392,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
+ ktime_t tstamp;
size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -402,11 +403,13 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ + nla_total_size(sizeof(u_int32_t)) /* priority */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+ nla_total_size(sizeof(u_int32_t)) /* skbinfo */
+ nla_total_size(sizeof(u_int32_t)); /* cap_len */
- if (entskb->tstamp)
+ tstamp = skb_tstamp_cond(entskb, false);
+ if (tstamp)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
size += nfqnl_get_bridge_size(entry);
@@ -559,6 +562,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
goto nla_put_failure;
+ if (entskb->priority &&
+ nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority)))
+ goto nla_put_failure;
+
if (indev && entskb->dev &&
skb_mac_header_was_set(entskb) &&
skb_mac_header_len(entskb) != 0) {
@@ -577,9 +584,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (nfqnl_put_bridge(entry, skb) < 0)
goto nla_put_failure;
- if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
+ if (entry->state.hook <= NF_INET_FORWARD && tstamp) {
struct nfqnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
+ struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
@@ -710,9 +717,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -830,11 +843,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
}
static int
-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
{
struct sk_buff *nskb;
if (diff < 0) {
+ unsigned int min_len = skb_transport_offset(e->skb);
+
+ if (data_len < min_len)
+ return -EINVAL;
+
if (pskb_trim(e->skb, data_len))
return -ENOMEM;
} else if (diff > 0) {
@@ -1014,11 +1032,13 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
[NFQA_CT] = { .type = NLA_UNSPEC },
[NFQA_EXP] = { .type = NLA_UNSPEC },
[NFQA_VLAN] = { .type = NLA_NESTED },
+ [NFQA_PRIORITY] = { .type = NLA_U32 },
};
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
[NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
[NFQA_MARK] = { .type = NLA_U32 },
+ [NFQA_PRIORITY] = { .type = NLA_U32 },
};
static struct nfqnl_instance *
@@ -1099,6 +1119,9 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+ if (nfqa[NFQA_PRIORITY])
+ entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
+
nfqnl_reinject(entry, verdict);
}
return 0;
@@ -1225,6 +1248,9 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+ if (nfqa[NFQA_PRIORITY])
+ entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
+
nfqnl_reinject(entry, verdict);
return 0;
}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 7b727d3ebf9d..e6e402b247d0 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
{
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
@@ -93,7 +93,16 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
static int nft_bitwise_init_bool(struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
- struct nft_data_desc mask, xor;
+ struct nft_data_desc mask = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->mask),
+ .len = priv->len,
+ };
+ struct nft_data_desc xor = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->xor),
+ .len = priv->len,
+ };
int err;
if (tb[NFTA_BITWISE_DATA])
@@ -103,36 +112,30 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
!tb[NFTA_BITWISE_XOR])
return -EINVAL;
- err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask,
- tb[NFTA_BITWISE_MASK]);
+ err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
- if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
- err = -EINVAL;
- goto err1;
- }
- err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
- tb[NFTA_BITWISE_XOR]);
+ err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]);
if (err < 0)
- goto err1;
- if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
- err = -EINVAL;
- goto err2;
- }
+ goto err_xor_err;
return 0;
-err2:
- nft_data_release(&priv->xor, xor.type);
-err1:
+
+err_xor_err:
nft_data_release(&priv->mask, mask.type);
+
return err;
}
static int nft_bitwise_init_shift(struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
- struct nft_data_desc d;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ .len = sizeof(u32),
+ };
int err;
if (tb[NFTA_BITWISE_MASK] ||
@@ -142,13 +145,12 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
if (!tb[NFTA_BITWISE_DATA])
return -EINVAL;
- err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &d,
- tb[NFTA_BITWISE_DATA]);
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]);
if (err < 0)
return err;
- if (d.type != NFT_DATA_VALUE || d.len != sizeof(u32) ||
- priv->data.data[0] >= BITS_PER_TYPE(u32)) {
- nft_data_release(&priv->data, d.type);
+
+ if (priv->data.data[0] >= BITS_PER_TYPE(u32)) {
+ nft_data_release(&priv->data, desc.type);
return -EINVAL;
}
@@ -283,12 +285,16 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
const struct nft_bitwise *bitwise;
+ unsigned int regcount;
+ u8 dreg;
+ int i;
if (!track->regs[priv->sreg].selector)
return false;
- bitwise = nft_expr_priv(expr);
+ bitwise = nft_expr_priv(track->regs[priv->dreg].selector);
if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+ track->regs[priv->sreg].num_reg == 0 &&
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
@@ -302,17 +308,21 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
return true;
}
- if (track->regs[priv->sreg].bitwise) {
- track->regs[priv->dreg].selector = NULL;
- track->regs[priv->dreg].bitwise = NULL;
+ if (track->regs[priv->sreg].bitwise ||
+ track->regs[priv->sreg].num_reg != 0) {
+ nft_reg_track_cancel(track, priv->dreg, priv->len);
return false;
}
if (priv->sreg != priv->dreg) {
- track->regs[priv->dreg].selector =
- track->regs[priv->sreg].selector;
+ nft_reg_track_update(track, track->regs[priv->sreg].selector,
+ priv->dreg, priv->len);
}
- track->regs[priv->dreg].bitwise = expr;
+
+ dreg = priv->dreg;
+ regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE);
+ for (i = 0; i < regcount; i++, dreg++)
+ track->regs[priv->dreg].bitwise = expr;
return false;
}
@@ -330,22 +340,21 @@ static const struct nft_expr_ops nft_bitwise_ops = {
static int
nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out)
{
- struct nft_data_desc desc;
struct nft_data data;
- int err = 0;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(data),
+ .len = sizeof(u32),
+ };
+ int err;
- err = nft_data_init(NULL, &data, sizeof(data), &desc, tb);
+ err = nft_data_init(NULL, &data, &desc, tb);
if (err < 0)
return err;
- if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) {
- err = -EINVAL;
- goto err;
- }
*out = data.data[0];
-err:
- nft_data_release(&data, desc.type);
- return err;
+
+ return 0;
}
static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
@@ -434,7 +443,7 @@ static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
if (!track->regs[priv->sreg].selector)
return false;
- bitwise = nft_expr_priv(expr);
+ bitwise = nft_expr_priv(track->regs[priv->dreg].selector);
if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
@@ -447,8 +456,7 @@ static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
}
if (track->regs[priv->sreg].bitwise) {
- track->regs[priv->dreg].selector = NULL;
- track->regs[priv->dreg].bitwise = NULL;
+ nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE);
return false;
}
@@ -522,3 +530,4 @@ bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
return false;
}
+EXPORT_SYMBOL_GPL(nft_expr_reduce_bitwise);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 9d5947ab8d4e..f952a80275a8 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -44,7 +44,8 @@ void nft_byteorder_eval(const struct nft_expr *expr,
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 8; i++) {
src64 = nft_reg_load64(&src[i]);
- nft_reg_store64(&dst[i], be64_to_cpu(src64));
+ nft_reg_store64(&dst[i],
+ be64_to_cpu((__force __be64)src64));
}
break;
case NFT_BYTEORDER_HTON:
@@ -167,12 +168,23 @@ nla_put_failure:
return -1;
}
+static bool nft_byteorder_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ struct nft_byteorder *priv = nft_expr_priv(expr);
+
+ nft_reg_track_cancel(track, priv->dreg, priv->len);
+
+ return false;
+}
+
static const struct nft_expr_ops nft_byteorder_ops = {
.type = &nft_byteorder_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
.eval = nft_byteorder_eval,
.init = nft_byteorder_init,
.dump = nft_byteorder_dump,
+ .reduce = nft_byteorder_reduce,
};
struct nft_expr_type nft_byteorder_type __read_mostly = {
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 47b6d05f1ae6..963cf831799c 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -73,20 +73,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_cmp_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ };
int err;
- err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
- tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
if (err < 0)
return err;
- if (desc.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- nft_data_release(&priv->data, desc.type);
- return err;
- }
-
err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -125,13 +121,13 @@ static void nft_payload_n2h(union nft_cmp_offload_data *data,
{
switch (len) {
case 2:
- data->val16 = ntohs(*((u16 *)val));
+ data->val16 = ntohs(*((__be16 *)val));
break;
case 4:
- data->val32 = ntohl(*((u32 *)val));
+ data->val32 = ntohl(*((__be32 *)val));
break;
case 8:
- data->val64 = be64_to_cpu(*((u64 *)val));
+ data->val64 = be64_to_cpu(*((__be64 *)val));
break;
default:
WARN_ON_ONCE(1);
@@ -193,20 +189,35 @@ static const struct nft_expr_ops nft_cmp_ops = {
.eval = nft_cmp_eval,
.init = nft_cmp_init,
.dump = nft_cmp_dump,
+ .reduce = NFT_REDUCE_READONLY,
.offload = nft_cmp_offload,
};
+/* Calculate the mask for the nft_cmp_fast expression. On big endian the
+ * mask needs to include the *upper* bytes when interpreting that data as
+ * something smaller than the full u32, therefore a cpu_to_le32 is done.
+ */
+static u32 nft_cmp_fast_mask(unsigned int len)
+{
+ __le32 mask = cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr,
+ data) * BITS_PER_BYTE - len));
+
+ return (__force u32)mask;
+}
+
static int nft_cmp_fast_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc;
struct nft_data data;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(data),
+ };
int err;
- err = nft_data_init(NULL, &data, sizeof(data), &desc,
- tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
if (err < 0)
return err;
@@ -269,15 +280,113 @@ const struct nft_expr_ops nft_cmp_fast_ops = {
.eval = NULL, /* inlined */
.init = nft_cmp_fast_init,
.dump = nft_cmp_fast_dump,
+ .reduce = NFT_REDUCE_READONLY,
.offload = nft_cmp_fast_offload,
};
+static u32 nft_cmp_mask(u32 bitlen)
+{
+ return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen));
+}
+
+static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen)
+{
+ int len = bitlen / BITS_PER_BYTE;
+ int i, words = len / sizeof(u32);
+
+ for (i = 0; i < words; i++) {
+ data->data[i] = 0xffffffff;
+ bitlen -= sizeof(u32) * BITS_PER_BYTE;
+ }
+
+ if (len % sizeof(u32))
+ data->data[i++] = nft_cmp_mask(bitlen);
+
+ for (; i < 4; i++)
+ data->data[i] = 0;
+}
+
+static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ };
+ int err;
+
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return err;
+
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE);
+ priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
+ priv->len = desc.len;
+
+ return 0;
+}
+
+static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_cmp_expr cmp = {
+ .data = priv->data,
+ .sreg = priv->sreg,
+ .len = priv->len,
+ .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
+ };
+
+ return __nft_cmp_offload(ctx, flow, &cmp);
+}
+
+static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+ enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
+
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+
+const struct nft_expr_ops nft_cmp16_fast_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)),
+ .eval = NULL, /* inlined */
+ .init = nft_cmp16_fast_init,
+ .dump = nft_cmp16_fast_dump,
+ .reduce = NFT_REDUCE_READONLY,
+ .offload = nft_cmp16_fast_offload,
+};
+
static const struct nft_expr_ops *
nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
- struct nft_data_desc desc;
struct nft_data data;
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(data),
+ };
enum nft_cmp_ops op;
+ u8 sreg;
int err;
if (tb[NFTA_CMP_SREG] == NULL ||
@@ -298,21 +407,21 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
return ERR_PTR(-EINVAL);
}
- err = nft_data_init(NULL, &data, sizeof(data), &desc,
- tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
if (err < 0)
return ERR_PTR(err);
- if (desc.type != NFT_DATA_VALUE)
- goto err1;
-
- if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
- return &nft_cmp_fast_ops;
+ sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+ if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
+ if (desc.len <= sizeof(u32))
+ return &nft_cmp_fast_ops;
+ else if (desc.len <= sizeof(data) &&
+ ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) ||
+ (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0)))
+ return &nft_cmp16_fast_ops;
+ }
return &nft_cmp_ops;
-err1:
- nft_data_release(&data, desc.type);
- return ERR_PTR(-EINVAL);
}
struct nft_expr_type nft_cmp_type __read_mostly = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f69cc73c5813..c16172427622 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -731,6 +731,14 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
static struct nft_expr_type nft_match_type;
+static bool nft_match_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct xt_match *match = expr->ops->data;
+
+ return strcmp(match->name, "comment") == 0;
+}
+
static const struct nft_expr_ops *
nft_match_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -773,6 +781,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
ops->dump = nft_match_dump;
ops->validate = nft_match_validate;
ops->data = match;
+ ops->reduce = nft_match_reduce;
matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
if (matchsize > NFT_MATCH_LARGE_THRESH) {
@@ -862,6 +871,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
ops->dump = nft_target_dump;
ops->validate = nft_target_validate;
ops->data = target;
+ ops->reduce = NFT_REDUCE_READONLY;
if (family == NFPROTO_BRIDGE)
ops->eval = nft_target_eval_bridge;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 58dcafe8bf79..d657f999a11b 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
{
bool invert = false;
u32 flags, limit;
+ int err;
if (!tb[NFTA_CONNLIMIT_COUNT])
return -EINVAL;
@@ -76,7 +77,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
- priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+ priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL_ACCOUNT);
if (!priv->list)
return -ENOMEM;
@@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
priv->limit = limit;
priv->invert = invert;
- return nf_ct_netns_get(ctx->net, ctx->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
+ if (err < 0)
+ goto err_netns;
+
+ return 0;
+err_netns:
+ kfree(priv->list);
+
+ return err;
}
static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
@@ -206,7 +215,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_src = nft_expr_priv(src);
priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
- if (priv_dst->list)
+ if (!priv_dst->list)
return -ENOMEM;
nf_conncount_list_init(priv_dst->list);
@@ -248,6 +257,7 @@ static const struct nft_expr_ops nft_connlimit_ops = {
.destroy_clone = nft_connlimit_destroy_clone,
.dump = nft_connlimit_dump,
.gc = nft_connlimit_gc,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_connlimit_type __read_mostly = {
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f179e8c3b0ca..f4d3573e8782 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -62,7 +62,7 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- cpu_stats = alloc_percpu(struct nft_counter);
+ cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_KERNEL_ACCOUNT);
if (cpu_stats == NULL)
return -ENOMEM;
@@ -293,6 +293,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.destroy_clone = nft_counter_destroy,
.dump = nft_counter_dump,
.clone = nft_counter_clone,
+ .reduce = NFT_REDUCE_READONLY,
.offload = nft_counter_offload,
.offload_stats = nft_counter_offload_stats,
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 518d96c8c247..a3f01f209a53 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -26,6 +26,7 @@
struct nft_ct {
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
+ u8 len;
union {
u8 dreg;
u8 sreg;
@@ -203,12 +204,12 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
case NFT_CT_SRC_IP:
if (nf_ct_l3num(ct) != NFPROTO_IPV4)
goto err;
- *dest = tuple->src.u3.ip;
+ *dest = (__force __u32)tuple->src.u3.ip;
return;
case NFT_CT_DST_IP:
if (nf_ct_l3num(ct) != NFPROTO_IPV4)
goto err;
- *dest = tuple->dst.u3.ip;
+ *dest = (__force __u32)tuple->dst.u3.ip;
return;
case NFT_CT_SRC_IP6:
if (nf_ct_l3num(ct) != NFPROTO_IPV6)
@@ -260,9 +261,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
if (likely(refcount_read(&ct->ct_general.use) == 1)) {
+ refcount_inc(&ct->ct_general.use);
nf_ct_zone_add(ct, &zone);
} else {
- /* previous skb got queued to userspace */
+ /* previous skb got queued to userspace, allocate temporary
+ * one until percpu template can be reused.
+ */
ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
if (!ct) {
regs->verdict.code = NF_DROP;
@@ -497,6 +501,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
}
}
+ priv->len = len;
err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL,
NFT_DATA_VALUE, len);
if (err < 0)
@@ -605,6 +610,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
}
}
+ priv->len = len;
err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
@@ -674,6 +680,29 @@ nla_put_failure:
return -1;
}
+static bool nft_ct_get_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ const struct nft_ct *ct;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ ct = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->key != ct->key) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return nft_expr_reduce_bitwise(track, expr);
+}
+
static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
@@ -707,8 +736,27 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.init = nft_ct_get_init,
.destroy = nft_ct_get_destroy,
.dump = nft_ct_get_dump,
+ .reduce = nft_ct_get_reduce,
};
+static bool nft_ct_set_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ int i;
+
+ for (i = 0; i < NFT_REG32_NUM; i++) {
+ if (!track->regs[i].selector)
+ continue;
+
+ if (track->regs[i].selector->ops != &nft_ct_get_ops)
+ continue;
+
+ __nft_reg_track_cancel(track, i);
+ }
+
+ return false;
+}
+
static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
@@ -716,6 +764,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.init = nft_ct_set_init,
.destroy = nft_ct_set_destroy,
.dump = nft_ct_set_dump,
+ .reduce = nft_ct_set_reduce,
};
#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -726,6 +775,7 @@ static const struct nft_expr_ops nft_ct_set_zone_ops = {
.init = nft_ct_set_init,
.destroy = nft_ct_set_destroy,
.dump = nft_ct_set_dump,
+ .reduce = nft_ct_set_reduce,
};
#endif
@@ -782,6 +832,7 @@ static const struct nft_expr_ops nft_notrack_ops = {
.type = &nft_notrack_type,
.size = NFT_EXPR_SIZE(0),
.eval = nft_notrack_eval,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_notrack_type __read_mostly = {
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index bbf3fcba3df4..63507402716d 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
}
+static bool nft_dup_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
static struct nft_expr_type nft_dup_netdev_type;
static const struct nft_expr_ops nft_dup_netdev_ops = {
.type = &nft_dup_netdev_type,
@@ -74,7 +79,9 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
.eval = nft_dup_netdev_eval,
.init = nft_dup_netdev_init,
.dump = nft_dup_netdev_dump,
+ .reduce = NFT_REDUCE_READONLY,
.offload = nft_dup_netdev_offload,
+ .offload_action = nft_dup_netdev_offload_action,
};
static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 87f3af4645d9..6983e6ddeef9 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -60,7 +60,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
timeout, 0, GFP_ATOMIC);
- if (elem == NULL)
+ if (IS_ERR(elem))
goto err1;
ext = nft_set_elem_ext(set, elem);
@@ -413,6 +413,7 @@ static const struct nft_expr_ops nft_dynset_ops = {
.activate = nft_dynset_activate,
.deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
struct nft_expr_type nft_dynset_type __read_mostly = {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index dbe1f2e7dd9e..a67ea9c3ae57 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
{
struct tcphdr *tcph;
- if (pkt->tprot != IPPROTO_TCP)
+ if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
return NULL;
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
@@ -266,7 +266,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
switch (priv->len) {
case 2:
- old.v16 = get_unaligned((u16 *)(opt + offset));
+ old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset));
new.v16 = (__force __be16)nft_reg_load16(
&regs->data[priv->sreg]);
@@ -281,18 +281,18 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (old.v16 == new.v16)
return;
- put_unaligned(new.v16, (u16*)(opt + offset));
+ put_unaligned(new.v16, (__be16*)(opt + offset));
inet_proto_csum_replace2(&tcph->check, pkt->skb,
old.v16, new.v16, false);
break;
case 4:
- new.v32 = regs->data[priv->sreg];
- old.v32 = get_unaligned((u32 *)(opt + offset));
+ new.v32 = nft_reg_load_be32(&regs->data[priv->sreg]);
+ old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset));
if (old.v32 == new.v32)
return;
- put_unaligned(new.v32, (u32*)(opt + offset));
+ put_unaligned(new.v32, (__be32*)(opt + offset));
inet_proto_csum_replace4(&tcph->check, pkt->skb,
old.v32, new.v32, false);
break;
@@ -308,6 +308,63 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, tcphdr_len, optl;
+ struct tcphdr *tcph;
+ u8 *opt;
+
+ tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!tcph)
+ goto err;
+
+ if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+ goto drop;
+
+ opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+ if (!opt)
+ goto err;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ unsigned int j;
+
+ optl = optlen(opt, i);
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len)
+ goto drop;
+
+ for (j = 0; j < optl; ++j) {
+ u16 n = TCPOPT_NOP;
+ u16 o = opt[i+j];
+
+ if ((i + j) % 2 == 0) {
+ o <<= 8;
+ n <<= 8;
+ }
+ inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o),
+ htons(n), false);
+ }
+ memset(opt + i, TCPOPT_NOP, optl);
+ return;
+ }
+
+ /* option not found, continue. This allows to do multiple
+ * option removals per rule.
+ */
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+ return;
+drop:
+ /* can't remove, no choice but to drop */
+ regs->verdict.code = NF_DROP;
+}
+
static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -457,6 +514,28 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->len);
}
+static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_EXTHDR_SREG] ||
+ tb[NFTA_EXTHDR_DREG] ||
+ tb[NFTA_EXTHDR_FLAGS] ||
+ tb[NFTA_EXTHDR_OFFSET] ||
+ tb[NFTA_EXTHDR_LEN])
+ return -EINVAL;
+
+ if (!tb[NFTA_EXTHDR_TYPE])
+ return -EINVAL;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->op = NFT_EXTHDR_OP_TCPOPT;
+
+ return 0;
+}
+
static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -517,12 +596,47 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
return nft_exthdr_dump_common(skb, priv);
}
+static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ return nft_exthdr_dump_common(skb, priv);
+}
+
+static bool nft_exthdr_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+ const struct nft_exthdr *exthdr;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ exthdr = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->type != exthdr->type ||
+ priv->op != exthdr->op ||
+ priv->flags != exthdr->flags ||
+ priv->offset != exthdr->offset ||
+ priv->len != exthdr->len) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return nft_expr_reduce_bitwise(track, expr);
+}
+
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
.eval = nft_exthdr_ipv6_eval,
.init = nft_exthdr_init,
.dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
};
static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
@@ -531,6 +645,7 @@ static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
.eval = nft_exthdr_ipv4_eval,
.init = nft_exthdr_ipv4_init,
.dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
};
static const struct nft_expr_ops nft_exthdr_tcp_ops = {
@@ -539,6 +654,7 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
.eval = nft_exthdr_tcp_eval,
.init = nft_exthdr_init,
.dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
};
static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
@@ -547,6 +663,16 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
.eval = nft_exthdr_tcp_set_eval,
.init = nft_exthdr_tcp_set_init,
.dump = nft_exthdr_dump_set,
+ .reduce = NFT_REDUCE_READONLY,
+};
+
+static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_strip_eval,
+ .init = nft_exthdr_tcp_strip_init,
+ .dump = nft_exthdr_dump_strip,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nft_expr_ops nft_exthdr_sctp_ops = {
@@ -555,6 +681,7 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.eval = nft_exthdr_sctp_eval,
.init = nft_exthdr_init,
.dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
};
static const struct nft_expr_ops *
@@ -576,7 +703,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
return &nft_exthdr_tcp_set_ops;
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_tcp_ops;
- break;
+ return &nft_exthdr_tcp_strip_ops;
case NFT_EXTHDR_OP_IPV6:
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_ipv6_ops;
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index b10ce732b337..1f12d7ade606 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
hooks = (1 << NF_INET_PRE_ROUTING);
+ if (priv->flags & NFTA_FIB_F_IIF) {
+ hooks |= (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ }
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
@@ -156,5 +160,47 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
}
EXPORT_SYMBOL_GPL(nft_fib_store_result);
+bool nft_fib_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_fib *priv = nft_expr_priv(expr);
+ unsigned int len = NFT_REG32_SIZE;
+ const struct nft_fib *fib;
+
+ switch (priv->result) {
+ case NFT_FIB_RESULT_OIF:
+ break;
+ case NFT_FIB_RESULT_OIFNAME:
+ if (priv->flags & NFTA_FIB_F_PRESENT)
+ len = NFT_REG32_SIZE;
+ else
+ len = IFNAMSIZ;
+ break;
+ case NFT_FIB_RESULT_ADDRTYPE:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, len);
+ return false;
+ }
+
+ fib = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->result != fib->result ||
+ priv->flags != fib->flags) {
+ nft_reg_track_update(track, expr, priv->dreg, len);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(nft_fib_reduce);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
index a88d44e163d1..666a3741d20b 100644
--- a/net/netfilter/nft_fib_inet.c
+++ b/net/netfilter/nft_fib_inet.c
@@ -49,6 +49,7 @@ static const struct nft_expr_ops nft_fib_inet_ops = {
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
+ .reduce = nft_fib_reduce,
};
static struct nft_expr_type nft_fib_inet_type __read_mostly = {
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
index 3f3478abd845..9121ec64e918 100644
--- a/net/netfilter/nft_fib_netdev.c
+++ b/net/netfilter/nft_fib_netdev.c
@@ -58,6 +58,7 @@ static const struct nft_expr_ops nft_fib_netdev_ops = {
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
+ .reduce = nft_fib_reduce,
};
static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 0af34ad41479..a25c88bc8b75 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
}
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+ return false;
+
+ return true;
+}
+
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache,
const struct nf_conn *ct,
@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct neighbour *n;
u8 nud_state;
+ if (!nft_is_valid_ether_device(dev))
+ goto out;
+
n = dst_neigh_lookup(dst_cache, daddr);
if (!n)
return -1;
@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
if (!(nud_state & NUD_VALID))
return -1;
+out:
return dev_fill_forward_path(dev, ha, stack);
}
@@ -78,15 +91,6 @@ struct nft_forward_info {
enum flow_offload_xmit_type xmit_type;
};
-static bool nft_is_valid_ether_device(const struct net_device *dev)
-{
- if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
- dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
- return false;
-
- return true;
-}
-
static void nft_dev_path_info(const struct net_device_path_stack *stack,
struct nft_forward_info *info,
unsigned char *ha, struct nf_flowtable *flowtable)
@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
info->indev = NULL;
break;
}
- info->outdev = path->dev;
+ if (!info->outdev)
+ info->outdev = path->dev;
info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++;
@@ -227,11 +232,21 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
+ fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
+ fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
+ fl.u.ip4.flowi4_mark = pkt->skb->mark;
+ fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
case NFPROTO_IPV6:
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
+ fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
+ fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
+ fl.u.ip6.flowi6_mark = pkt->skb->mark;
+ fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
break;
}
@@ -293,11 +308,25 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
case IPPROTO_TCP:
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
sizeof(_tcph), &_tcph);
- if (unlikely(!tcph || tcph->fin || tcph->rst))
+ if (unlikely(!tcph || tcph->fin || tcph->rst ||
+ !nf_conntrack_tcp_established(ct)))
goto out;
break;
case IPPROTO_UDP:
break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE: {
+ struct nf_conntrack_tuple *tuple;
+
+ if (ct->status & IPS_NAT_MASK)
+ goto out;
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ /* No support for GRE v1 */
+ if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
+ goto out;
+ break;
+ }
+#endif
default:
goto out;
}
@@ -428,6 +457,7 @@ static const struct nft_expr_ops nft_flow_offload_ops = {
.destroy = nft_flow_offload_destroy,
.validate = nft_flow_offload_validate,
.dump = nft_flow_offload_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_flow_offload_type __read_mostly = {
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index fa9301ca6033..7c5876dc9ff2 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -79,6 +79,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
}
+static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr)
+{
+ return true;
+}
+
struct nft_fwd_neigh {
u8 sreg_dev;
u8 sreg_addr;
@@ -140,7 +145,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
return;
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
neigh_xmit(neigh_table, dev, addr, skb);
out:
regs->verdict.code = verdict;
@@ -212,6 +217,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.init = nft_fwd_neigh_init,
.dump = nft_fwd_neigh_dump,
.validate = nft_fwd_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nft_expr_ops nft_fwd_netdev_ops = {
@@ -221,7 +227,9 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.init = nft_fwd_netdev_init,
.dump = nft_fwd_netdev_dump,
.validate = nft_fwd_validate,
+ .reduce = NFT_REDUCE_READONLY,
.offload = nft_fwd_netdev_offload,
+ .offload_action = nft_fwd_netdev_offload_action,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index f829f5289e16..e5631e88b285 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -165,6 +165,16 @@ nla_put_failure:
return -1;
}
+static bool nft_jhash_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_jhash *priv = nft_expr_priv(expr);
+
+ nft_reg_track_cancel(track, priv->dreg, sizeof(u32));
+
+ return false;
+}
+
static int nft_symhash_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
@@ -185,6 +195,30 @@ nla_put_failure:
return -1;
}
+static bool nft_symhash_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+ struct nft_symhash *symhash;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, sizeof(u32));
+ return false;
+ }
+
+ symhash = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->offset != symhash->offset ||
+ priv->modulus != symhash->modulus) {
+ nft_reg_track_update(track, expr, priv->dreg, sizeof(u32));
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return false;
+}
+
static struct nft_expr_type nft_hash_type;
static const struct nft_expr_ops nft_jhash_ops = {
.type = &nft_hash_type,
@@ -192,6 +226,7 @@ static const struct nft_expr_ops nft_jhash_ops = {
.eval = nft_jhash_eval,
.init = nft_jhash_init,
.dump = nft_jhash_dump,
+ .reduce = nft_jhash_reduce,
};
static const struct nft_expr_ops nft_symhash_ops = {
@@ -200,6 +235,7 @@ static const struct nft_expr_ops nft_symhash_ops = {
.eval = nft_symhash_eval,
.init = nft_symhash_init,
.dump = nft_symhash_dump,
+ .reduce = nft_symhash_reduce,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 90c64d27ae53..5f28b21abc7d 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -29,20 +29,36 @@ static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
[NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED },
};
+static enum nft_data_types nft_reg_to_type(const struct nlattr *nla)
+{
+ enum nft_data_types type;
+ u8 reg;
+
+ reg = ntohl(nla_get_be32(nla));
+ if (reg == NFT_REG_VERDICT)
+ type = NFT_DATA_VERDICT;
+ else
+ type = NFT_DATA_VALUE;
+
+ return type;
+}
+
static int nft_immediate_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_immediate_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc;
+ struct nft_data_desc desc = {
+ .size = sizeof(priv->data),
+ };
int err;
if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
tb[NFTA_IMMEDIATE_DATA] == NULL)
return -EINVAL;
- err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
- tb[NFTA_IMMEDIATE_DATA]);
+ desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]);
+ err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
if (err < 0)
return err;
@@ -213,6 +229,27 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_immediate_offload_action(const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ return true;
+
+ return false;
+}
+
+static bool nft_immediate_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg != NFT_REG_VERDICT)
+ nft_reg_track_cancel(track, priv->dreg, priv->dlen);
+
+ return false;
+}
+
static const struct nft_expr_ops nft_imm_ops = {
.type = &nft_imm_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -223,8 +260,9 @@ static const struct nft_expr_ops nft_imm_ops = {
.destroy = nft_immediate_destroy,
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
+ .reduce = nft_immediate_reduce,
.offload = nft_immediate_offload,
- .offload_flags = NFT_OFFLOAD_F_ACTION,
+ .offload_action = nft_immediate_offload_action,
};
struct nft_expr_type nft_imm_type __read_mostly = {
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 5ee33d0ccd4e..bb15a55dad5c 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -30,7 +30,7 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
u64 last_jiffies;
int err;
- last = kzalloc(sizeof(*last), GFP_KERNEL);
+ last = kzalloc(sizeof(*last), GFP_KERNEL_ACCOUNT);
if (!last)
return -ENOMEM;
@@ -106,7 +106,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_last_priv *priv_dst = nft_expr_priv(dst);
priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
- if (priv_dst->last)
+ if (!priv_dst->last)
return -ENOMEM;
return 0;
@@ -120,6 +120,7 @@ static const struct nft_expr_ops nft_last_ops = {
.destroy = nft_last_destroy,
.clone = nft_last_clone,
.dump = nft_last_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
struct nft_expr_type nft_last_type __read_mostly = {
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index f04be5be73a0..981addb2d051 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -90,7 +90,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
priv->rate);
}
- priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+ priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
if (!priv->limit)
return -ENOMEM;
@@ -145,7 +145,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
priv_dst->invert = priv_src->invert;
priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
- if (priv_dst->limit)
+ if (!priv_dst->limit)
return -ENOMEM;
spin_lock_init(&priv_dst->limit->lock);
@@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+ priv_dst->cost = priv_src->cost;
+
return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
}
@@ -225,6 +227,7 @@ static const struct nft_expr_ops nft_limit_pkts_ops = {
.destroy = nft_limit_pkts_destroy,
.clone = nft_limit_pkts_clone,
.dump = nft_limit_pkts_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static void nft_limit_bytes_eval(const struct nft_expr *expr,
@@ -279,6 +282,7 @@ static const struct nft_expr_ops nft_limit_bytes_ops = {
.dump = nft_limit_bytes_dump,
.clone = nft_limit_bytes_clone,
.destroy = nft_limit_bytes_destroy,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nft_expr_ops *
@@ -340,11 +344,20 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
+static void nft_limit_obj_pkts_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, &priv->limit);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_pkts_ops = {
.type = &nft_limit_obj_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
.init = nft_limit_obj_pkts_init,
+ .destroy = nft_limit_obj_pkts_destroy,
.eval = nft_limit_obj_pkts_eval,
.dump = nft_limit_obj_pkts_dump,
};
@@ -378,11 +391,20 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
+static void nft_limit_obj_bytes_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_limit_priv *priv = nft_obj_data(obj);
+
+ nft_limit_destroy(ctx, priv);
+}
+
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_bytes_ops = {
.type = &nft_limit_obj_type,
.size = sizeof(struct nft_limit_priv),
.init = nft_limit_obj_bytes_init,
+ .destroy = nft_limit_obj_bytes_destroy,
.eval = nft_limit_obj_bytes_eval,
.dump = nft_limit_obj_bytes_dump,
};
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 54f6c2035e84..0e13c003f0c1 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -290,6 +290,7 @@ static const struct nft_expr_ops nft_log_ops = {
.init = nft_log_init,
.destroy = nft_log_destroy,
.dump = nft_log_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_log_type __read_mostly = {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 90becbf5bff3..dfae12759c7c 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -253,6 +253,17 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
return 0;
}
+static bool nft_lookup_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_lookup *priv = nft_expr_priv(expr);
+
+ if (priv->set->flags & NFT_SET_MAP)
+ nft_reg_track_cancel(track, priv->dreg, priv->set->dlen);
+
+ return false;
+}
+
static const struct nft_expr_ops nft_lookup_ops = {
.type = &nft_lookup_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
@@ -263,6 +274,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
.validate = nft_lookup_validate,
+ .reduce = nft_lookup_reduce,
};
struct nft_expr_type nft_lookup_type __read_mostly = {
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9953e8053753..2a0adc497bbb 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -129,6 +129,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = {
.destroy = nft_masq_ipv4_destroy,
.dump = nft_masq_dump,
.validate = nft_masq_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
@@ -175,6 +176,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = {
.destroy = nft_masq_ipv6_destroy,
.dump = nft_masq_dump,
.validate = nft_masq_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
@@ -230,6 +232,7 @@ static const struct nft_expr_ops nft_masq_inet_ops = {
.destroy = nft_masq_inet_destroy,
.dump = nft_masq_dump,
.validate = nft_masq_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_masq_inet_type __read_mostly = {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5ab4df56c945..55d2d49c3425 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/random.h>
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
@@ -32,8 +33,6 @@
#define NFT_META_SECS_PER_DAY 86400
#define NFT_META_DAYS_PER_WEEK 7
-static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-
static u8 nft_meta_weekday(void)
{
time64_t secs = ktime_get_real_seconds();
@@ -271,13 +270,6 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
return true;
}
-static noinline u32 nft_prandom_u32(void)
-{
- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
-
- return prandom_u32_state(state);
-}
-
#ifdef CONFIG_IP_ROUTE_CLASSID
static noinline bool
nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
@@ -389,7 +381,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
#endif
case NFT_META_PRANDOM:
- *dest = nft_prandom_u32();
+ *dest = get_random_u32();
break;
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
@@ -518,7 +510,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
- prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
#ifdef CONFIG_XFRM
@@ -539,6 +530,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
+ priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
@@ -664,6 +656,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
+ priv->len = len;
err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
@@ -750,24 +743,21 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
return 0;
}
-static bool nft_meta_get_reduce(struct nft_regs_track *track,
- const struct nft_expr *expr)
+bool nft_meta_get_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct nft_meta *meta;
- if (!track->regs[priv->dreg].selector ||
- track->regs[priv->dreg].selector->ops != expr->ops) {
- track->regs[priv->dreg].selector = expr;
- track->regs[priv->dreg].bitwise = NULL;
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
return false;
}
meta = nft_expr_priv(track->regs[priv->dreg].selector);
if (priv->key != meta->key ||
priv->dreg != meta->dreg) {
- track->regs[priv->dreg].selector = expr;
- track->regs[priv->dreg].bitwise = NULL;
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
return false;
}
@@ -776,6 +766,7 @@ static bool nft_meta_get_reduce(struct nft_regs_track *track,
return nft_expr_reduce_bitwise(track, expr);
}
+EXPORT_SYMBOL_GPL(nft_meta_get_reduce);
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
@@ -800,8 +791,7 @@ static bool nft_meta_set_reduce(struct nft_regs_track *track,
if (track->regs[i].selector->ops != &nft_meta_get_ops)
continue;
- track->regs[i].selector = NULL;
- track->regs[i].bitwise = NULL;
+ __nft_reg_track_cancel(track, i);
}
return false;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index be1595d6979d..e5fd6995e4bf 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -317,6 +317,7 @@ static const struct nft_expr_ops nft_nat_ops = {
.destroy = nft_nat_destroy,
.dump = nft_nat_dump,
.validate = nft_nat_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_nat_type __read_mostly = {
@@ -334,7 +335,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr,
{
const struct nft_nat *priv = nft_expr_priv(expr);
- if (priv->family == nft_pf(pkt))
+ if (priv->family == nft_pf(pkt) ||
+ priv->family == NFPROTO_INET)
nft_nat_eval(expr, regs, pkt);
}
@@ -346,6 +348,7 @@ static const struct nft_expr_ops nft_nat_inet_ops = {
.destroy = nft_nat_destroy,
.dump = nft_nat_dump,
.validate = nft_nat_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_inet_nat_type __read_mostly = {
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 1d378efd8823..45d3dc9e96f2 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -9,12 +9,11 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/random.h>
#include <linux/static_key.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
-static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
-
struct nft_ng_inc {
u8 dreg;
u32 modulus;
@@ -85,6 +84,16 @@ err:
return err;
}
+static bool nft_ng_inc_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE);
+
+ return false;
+}
+
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
u32 modulus, enum nft_ng_types type, u32 offset)
{
@@ -125,12 +134,9 @@ struct nft_ng_random {
u32 offset;
};
-static u32 nft_ng_random_gen(struct nft_ng_random *priv)
+static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
{
- struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
-
- return reciprocal_scale(prandom_u32_state(state), priv->modulus) +
- priv->offset;
+ return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
}
static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -158,8 +164,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- prandom_init_once(&nft_numgen_prandom_state);
-
return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, sizeof(u32));
}
@@ -172,6 +176,16 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
+static bool nft_ng_random_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+ nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE);
+
+ return false;
+}
+
static struct nft_expr_type nft_ng_type;
static const struct nft_expr_ops nft_ng_inc_ops = {
.type = &nft_ng_type,
@@ -180,6 +194,7 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
.init = nft_ng_inc_init,
.destroy = nft_ng_inc_destroy,
.dump = nft_ng_inc_dump,
+ .reduce = nft_ng_inc_reduce,
};
static const struct nft_expr_ops nft_ng_random_ops = {
@@ -188,6 +203,7 @@ static const struct nft_expr_ops nft_ng_random_ops = {
.eval = nft_ng_random_eval,
.init = nft_ng_random_init,
.dump = nft_ng_random_dump,
+ .reduce = nft_ng_random_reduce,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 94b2327e71dc..5d8d91b3904d 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -91,6 +91,7 @@ static const struct nft_expr_ops nft_objref_ops = {
.activate = nft_objref_activate,
.deactivate = nft_objref_deactivate,
.dump = nft_objref_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
struct nft_objref_map {
@@ -204,6 +205,7 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index d82677e83400..adacf95b6e2b 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -51,7 +51,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
data.genre, data.version);
else
- strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
+ strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
}
@@ -99,7 +99,7 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_OSF_FLAGS, ntohl(priv->flags)))
+ if (nla_put_u32(skb, NFTA_OSF_FLAGS, ntohl((__force __be32)priv->flags)))
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
@@ -115,9 +115,45 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_FORWARD));
+ unsigned int hooks;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+static bool nft_osf_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ struct nft_osf *priv = nft_expr_priv(expr);
+ struct nft_osf *osf;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN);
+ return false;
+ }
+
+ osf = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->flags != osf->flags ||
+ priv->ttl != osf->ttl) {
+ nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return false;
}
static struct nft_expr_type nft_osf_type;
@@ -128,6 +164,7 @@ static const struct nft_expr_ops nft_osf_op = {
.dump = nft_osf_dump,
.type = &nft_osf_type,
.validate = nft_osf_validate,
+ .reduce = nft_osf_reduce,
};
static struct nft_expr_type nft_osf_type __read_mostly = {
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 940fed9a760b..4edd899aeb9b 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -83,7 +83,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
{
unsigned int thoff = nft_thoff(pkt);
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
return -1;
switch (pkt->tprot) {
@@ -147,7 +147,7 @@ void nft_payload_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
goto err;
offset = nft_thoff(pkt);
break;
@@ -173,10 +173,10 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
[NFTA_PAYLOAD_SREG] = { .type = NLA_U32 },
[NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
[NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255),
+ [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 },
};
@@ -216,10 +216,8 @@ static bool nft_payload_reduce(struct nft_regs_track *track,
const struct nft_payload *priv = nft_expr_priv(expr);
const struct nft_payload *payload;
- if (!track->regs[priv->dreg].selector ||
- track->regs[priv->dreg].selector->ops != expr->ops) {
- track->regs[priv->dreg].selector = expr;
- track->regs[priv->dreg].bitwise = NULL;
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
return false;
}
@@ -227,8 +225,7 @@ static bool nft_payload_reduce(struct nft_regs_track *track,
if (priv->base != payload->base ||
priv->offset != payload->offset ||
priv->len != payload->len) {
- track->regs[priv->dreg].selector = expr;
- track->regs[priv->dreg].bitwise = NULL;
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
return false;
}
@@ -688,7 +685,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
- if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+ if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
goto err;
offset = nft_thoff(pkt);
break;
@@ -728,7 +725,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
pkt->tprot == IPPROTO_SCTP &&
skb->ip_summed != CHECKSUM_PARTIAL) {
- if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
+ if (pkt->fragoff == 0 &&
+ nft_payload_csum_sctp(skb, nft_thoff(pkt)))
goto err;
}
@@ -742,17 +740,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
+ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
+ int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
- priv->csum_type =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
- if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
- priv->csum_offset =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
+ &csum_offset);
+ if (err < 0)
+ return err;
+
+ priv->csum_offset = csum_offset;
+ }
if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
u32 flags;
@@ -763,7 +767,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->csum_flags = flags;
}
- switch (priv->csum_type) {
+ switch (csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
@@ -777,6 +781,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
default:
return -EOPNOTSUPP;
}
+ priv->csum_type = csum_type;
return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
@@ -814,8 +819,7 @@ static bool nft_payload_set_reduce(struct nft_regs_track *track,
track->regs[i].selector->ops != &nft_payload_fast_ops)
continue;
- track->regs[i].selector = NULL;
- track->regs[i].bitwise = NULL;
+ __nft_reg_track_cancel(track, i);
}
return false;
@@ -836,6 +840,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
{
enum nft_payload_bases base;
unsigned int offset, len;
+ int err;
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
@@ -862,8 +867,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
+ if (err < 0)
+ return ERR_PTR(err);
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 9ba1de51ac07..da29e92c03e2 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -68,6 +68,31 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr,
regs->verdict.code = ret;
}
+static int nft_queue_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ case NFPROTO_BRIDGE:
+ break;
+ case NFPROTO_NETDEV: /* lacks okfn */
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, supported_hooks);
+}
+
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM] = { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL] = { .type = NLA_U16 },
@@ -164,6 +189,8 @@ static const struct nft_expr_ops nft_queue_ops = {
.eval = nft_queue_eval,
.init = nft_queue_init,
.dump = nft_queue_dump,
+ .validate = nft_queue_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nft_expr_ops nft_queue_sreg_ops = {
@@ -172,6 +199,8 @@ static const struct nft_expr_ops nft_queue_sreg_ops = {
.eval = nft_queue_sreg_eval,
.init = nft_queue_sreg_init,
.dump = nft_queue_sreg_dump,
+ .validate = nft_queue_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0484aef74273..e6b0df68feea 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -90,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return -EOPNOTSUPP;
}
- priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+ priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL_ACCOUNT);
if (!priv->consumed)
return -ENOMEM;
@@ -237,7 +237,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_quota *priv_dst = nft_expr_priv(dst);
priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
- if (priv_dst->consumed)
+ if (!priv_dst->consumed)
return -ENOMEM;
atomic64_set(priv_dst->consumed, 0);
@@ -254,6 +254,7 @@ static const struct nft_expr_ops nft_quota_ops = {
.destroy = nft_quota_destroy,
.clone = nft_quota_clone,
.dump = nft_quota_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_quota_type __read_mostly = {
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index e4a1c44d7f51..832f0d725a9e 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -51,7 +51,14 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
const struct nlattr * const tb[])
{
struct nft_range_expr *priv = nft_expr_priv(expr);
- struct nft_data_desc desc_from, desc_to;
+ struct nft_data_desc desc_from = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data_from),
+ };
+ struct nft_data_desc desc_to = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data_to),
+ };
int err;
u32 op;
@@ -61,26 +68,16 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
!tb[NFTA_RANGE_TO_DATA])
return -EINVAL;
- err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
- &desc_from, tb[NFTA_RANGE_FROM_DATA]);
+ err = nft_data_init(NULL, &priv->data_from, &desc_from,
+ tb[NFTA_RANGE_FROM_DATA]);
if (err < 0)
return err;
- if (desc_from.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- goto err1;
- }
-
- err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
- &desc_to, tb[NFTA_RANGE_TO_DATA]);
+ err = nft_data_init(NULL, &priv->data_to, &desc_to,
+ tb[NFTA_RANGE_TO_DATA]);
if (err < 0)
goto err1;
- if (desc_to.type != NFT_DATA_VALUE) {
- err = -EINVAL;
- goto err2;
- }
-
if (desc_from.len != desc_to.len) {
err = -EINVAL;
goto err2;
@@ -140,6 +137,7 @@ static const struct nft_expr_ops nft_range_ops = {
.eval = nft_range_eval,
.init = nft_range_init,
.dump = nft_range_dump,
+ .reduce = NFT_REDUCE_READONLY,
};
struct nft_expr_type nft_range_type __read_mostly = {
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index ba09890dddb5..5086adfe731c 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -134,6 +134,7 @@ static const struct nft_expr_ops nft_redir_ipv4_ops = {
.destroy = nft_redir_ipv4_destroy,
.dump = nft_redir_dump,
.validate = nft_redir_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
@@ -183,6 +184,7 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = {
.destroy = nft_redir_ipv6_destroy,
.dump = nft_redir_dump,
.validate = nft_redir_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
@@ -225,6 +227,7 @@ static const struct nft_expr_ops nft_redir_inet_ops = {
.destroy = nft_redir_inet_destroy,
.dump = nft_redir_dump,
.validate = nft_redir_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_redir_inet_type __read_mostly = {
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 554caf967baa..973fa31a9dd6 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -80,6 +80,7 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
.init = nft_reject_init,
.dump = nft_reject_dump,
.validate = nft_reject_inet_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_reject_inet_type __read_mostly = {
diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c
index 61cd8c4ac385..7865cd8b11bb 100644
--- a/net/netfilter/nft_reject_netdev.c
+++ b/net/netfilter/nft_reject_netdev.c
@@ -159,6 +159,7 @@ static const struct nft_expr_ops nft_reject_netdev_ops = {
.init = nft_reject_init,
.dump = nft_reject_dump,
.validate = nft_reject_netdev_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_reject_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index bcd01a63e38f..71931ec91721 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -191,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.init = nft_rt_get_init,
.dump = nft_rt_get_dump,
.validate = nft_rt_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index e7ae5914971e..96081ac8d2b4 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -21,7 +21,7 @@ struct nft_bitmap_elem {
* the element state in the current and the future generation.
*
* An element can be in three states. The generation cursor is represented using
- * the ^ character, note that this cursor shifts on every succesful transaction.
+ * the ^ character, note that this cursor shifts on every successful transaction.
* If no transaction is going on, we observe all elements are in the following
* state:
*
@@ -39,7 +39,7 @@ struct nft_bitmap_elem {
* 10 = this element is active in the current generation and it becomes inactive
* ^ in the next one. This happens when the element is deactivated but commit
* path has not yet been executed yet, so removal is still pending. On
- * transation abortion, the next generation bit is reset to go back to
+ * transaction abortion, the next generation bit is reset to go back to
* restore its previous state.
*/
struct nft_bitmap {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index df40314de21f..76de6c8d9865 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
he = prev;
}
@@ -152,6 +153,7 @@ out:
err2:
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
err1:
return false;
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 2c8051d8cca6..4f9299b9dcdd 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -2125,6 +2125,32 @@ out_scratch:
}
/**
+ * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
+ */
+static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+ struct nft_pipapo_match *m)
+{
+ struct nft_pipapo_field *f;
+ int i, r;
+
+ for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+ ;
+
+ for (r = 0; r < f->rules; r++) {
+ struct nft_pipapo_elem *e;
+
+ if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+ continue;
+
+ e = f->mt[r].e;
+
+ nft_set_elem_destroy(set, e, true);
+ }
+}
+
+/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
* @set: nftables API set representation
*/
@@ -2132,26 +2158,13 @@ static void nft_pipapo_destroy(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
- int i, r, cpu;
+ int cpu;
m = rcu_dereference_protected(priv->match, true);
if (m) {
rcu_barrier();
- for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
- ;
-
- for (r = 0; r < f->rules; r++) {
- struct nft_pipapo_elem *e;
-
- if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
- continue;
-
- e = f->mt[r].e;
-
- nft_set_elem_destroy(set, e, true);
- }
+ nft_set_pipapo_match_destroy(set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2165,6 +2178,11 @@ static void nft_pipapo_destroy(const struct nft_set *set)
}
if (priv->clone) {
+ m = priv->clone;
+
+ if (priv->dirty)
+ nft_set_pipapo_match_destroy(set, m);
+
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
#endif
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d600a566da32..7325bee7d144 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*ext = &rbe->ext;
return -EEXIST;
} else {
- p = &parent->rb_left;
+ overlap = false;
+ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index d601974c9d2e..49a5348a6a14 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -10,6 +10,7 @@
struct nft_socket {
enum nft_socket_keys key:8;
u8 level;
+ u8 len;
union {
u8 dreg;
};
@@ -36,24 +37,50 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
#ifdef CONFIG_SOCK_CGROUP_DATA
static noinline bool
-nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
+nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level)
{
- struct sock *sk = skb_to_full_sk(pkt->skb);
struct cgroup *cgrp;
+ u64 cgid;
- if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+ if (!sk_fullsock(sk))
return false;
- cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- if (level > cgrp->level)
+ cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level);
+ if (!cgrp)
return false;
- memcpy(dest, &cgrp->ancestor_ids[level], sizeof(u64));
-
+ cgid = cgroup_id(cgrp);
+ memcpy(dest, &cgid, sizeof(u64));
return true;
}
#endif
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sk_buff *skb = pkt->skb;
+ struct sock *sk = NULL;
+
+ if (!indev)
+ return NULL;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return sk;
+}
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -67,20 +94,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
sk = NULL;
if (!sk)
- switch(nft_pf(pkt)) {
- case NFPROTO_IPV4:
- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
- break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- case NFPROTO_IPV6:
- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
- break;
-#endif
- default:
- WARN_ON_ONCE(1);
- regs->verdict.code = NFT_BREAK;
- return;
- }
+ sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
@@ -108,7 +122,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
break;
#ifdef CONFIG_SOCK_CGROUP_DATA
case NFT_SOCKET_CGROUPV2:
- if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) {
+ if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
regs->verdict.code = NFT_BREAK;
return;
}
@@ -150,7 +164,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY]));
+ priv->key = ntohl(nla_get_be32(tb[NFTA_SOCKET_KEY]));
switch(priv->key) {
case NFT_SOCKET_TRANSPARENT:
case NFT_SOCKET_WILDCARD:
@@ -166,7 +180,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
if (!tb[NFTA_SOCKET_LEVEL])
return -EINVAL;
- level = ntohl(nla_get_u32(tb[NFTA_SOCKET_LEVEL]));
+ level = ntohl(nla_get_be32(tb[NFTA_SOCKET_LEVEL]));
if (level > 255)
return -EOPNOTSUPP;
@@ -179,6 +193,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
+ priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
@@ -188,16 +203,51 @@ static int nft_socket_dump(struct sk_buff *skb,
{
const struct nft_socket *priv = nft_expr_priv(expr);
- if (nla_put_u32(skb, NFTA_SOCKET_KEY, htonl(priv->key)))
+ if (nla_put_be32(skb, NFTA_SOCKET_KEY, htonl(priv->key)))
return -1;
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
if (priv->key == NFT_SOCKET_CGROUPV2 &&
- nla_put_u32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+ nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
return -1;
return 0;
}
+static bool nft_socket_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_socket *priv = nft_expr_priv(expr);
+ const struct nft_socket *socket;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ socket = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->key != socket->key ||
+ priv->dreg != socket->dreg ||
+ priv->level != socket->level) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return nft_expr_reduce_bitwise(track, expr);
+}
+
+static int nft_socket_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
@@ -205,6 +255,8 @@ static const struct nft_expr_ops nft_socket_ops = {
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
+ .validate = nft_socket_validate,
+ .reduce = nft_socket_reduce,
};
static struct nft_expr_type nft_socket_type __read_mostly = {
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index a0109fa1e92d..6cf9a04fbfe2 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
if (err)
goto nf_ct_failure;
err = nf_synproxy_ipv6_init(snet, ctx->net);
- if (err)
+ if (err) {
+ nf_synproxy_ipv4_fini(snet, ctx->net);
goto nf_ct_failure;
+ }
break;
}
@@ -286,6 +288,7 @@ static const struct nft_expr_ops nft_synproxy_ops = {
.dump = nft_synproxy_dump,
.type = &nft_synproxy_type,
.validate = nft_synproxy_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_synproxy_type __read_mostly = {
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index b5b09a902c7a..62da25ad264b 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -52,11 +52,11 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
if (priv->sreg_addr)
- taddr = regs->data[priv->sreg_addr];
+ taddr = nft_reg_load_be32(&regs->data[priv->sreg_addr]);
taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr);
if (priv->sreg_port)
- tport = nft_reg_load16(&regs->data[priv->sreg_port]);
+ tport = nft_reg_load_be16(&regs->data[priv->sreg_port]);
if (!tport)
tport = hp->dest;
@@ -124,7 +124,7 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr);
if (priv->sreg_port)
- tport = nft_reg_load16(&regs->data[priv->sreg_port]);
+ tport = nft_reg_load_be16(&regs->data[priv->sreg_port]);
if (!tport)
tport = hp->dest;
@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb,
return 0;
}
+static int nft_tproxy_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
+}
+
static struct nft_expr_type nft_tproxy_type;
static const struct nft_expr_ops nft_tproxy_ops = {
.type = &nft_tproxy_type,
@@ -320,6 +327,8 @@ static const struct nft_expr_ops nft_tproxy_ops = {
.init = nft_tproxy_init,
.destroy = nft_tproxy_destroy,
.dump = nft_tproxy_dump,
+ .reduce = NFT_REDUCE_READONLY,
+ .validate = nft_tproxy_validate,
};
static struct nft_expr_type nft_tproxy_type __read_mostly = {
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3b27926d5382..983ade4be3b3 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -17,6 +17,7 @@ struct nft_tunnel {
enum nft_tunnel_keys key:8;
u8 dreg;
enum nft_tunnel_mode mode:8;
+ u8 len;
};
static void nft_tunnel_get_eval(const struct nft_expr *expr,
@@ -101,6 +102,7 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
priv->mode = NFT_TUNNEL_MODE_NONE;
}
+ priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
@@ -122,6 +124,31 @@ nla_put_failure:
return -1;
}
+static bool nft_tunnel_get_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_tunnel *priv = nft_expr_priv(expr);
+ const struct nft_tunnel *tunnel;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ tunnel = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->key != tunnel->key ||
+ priv->dreg != tunnel->dreg ||
+ priv->mode != tunnel->mode) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return false;
+}
+
static struct nft_expr_type nft_tunnel_type;
static const struct nft_expr_ops nft_tunnel_get_ops = {
.type = &nft_tunnel_type,
@@ -129,10 +156,12 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
.eval = nft_tunnel_get_eval,
.init = nft_tunnel_get_init,
.dump = nft_tunnel_get_dump,
+ .reduce = nft_tunnel_get_reduce,
};
static struct nft_expr_type nft_tunnel_type __read_mostly = {
.name = "tunnel",
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_get_ops,
.policy = nft_tunnel_policy,
.maxattr = NFTA_TUNNEL_MAX,
@@ -355,8 +384,9 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
struct ip_tunnel_info *info,
struct nft_tunnel_opts *opts)
{
- int err, rem, type = 0;
struct nlattr *nla;
+ __be16 type = 0;
+ int err, rem;
err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
nft_tunnel_opts_policy, NULL);
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index cbbbc4ecad3a..1c5343c936a8 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -27,6 +27,7 @@ struct nft_xfrm {
u8 dreg;
u8 dir;
u8 spnum;
+ u8 len;
};
static int nft_xfrm_get_init(const struct nft_ctx *ctx,
@@ -50,7 +51,7 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->key = ntohl(nla_get_u32(tb[NFTA_XFRM_KEY]));
+ priv->key = ntohl(nla_get_be32(tb[NFTA_XFRM_KEY]));
switch (priv->key) {
case NFT_XFRM_KEY_REQID:
case NFT_XFRM_KEY_SPI:
@@ -86,6 +87,7 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx,
priv->spnum = spnum;
+ priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
@@ -132,13 +134,13 @@ static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
WARN_ON_ONCE(1);
break;
case NFT_XFRM_KEY_DADDR_IP4:
- *dest = state->id.daddr.a4;
+ *dest = (__force __u32)state->id.daddr.a4;
return;
case NFT_XFRM_KEY_DADDR_IP6:
memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr));
return;
case NFT_XFRM_KEY_SADDR_IP4:
- *dest = state->props.saddr.a4;
+ *dest = (__force __u32)state->props.saddr.a4;
return;
case NFT_XFRM_KEY_SADDR_IP6:
memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr));
@@ -147,7 +149,7 @@ static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
*dest = state->props.reqid;
return;
case NFT_XFRM_KEY_SPI:
- *dest = state->id.spi;
+ *dest = (__force __u32)state->id.spi;
return;
}
@@ -252,6 +254,31 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e
return nft_chain_validate_hooks(ctx->chain, hooks);
}
+static bool nft_xfrm_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_xfrm *priv = nft_expr_priv(expr);
+ const struct nft_xfrm *xfrm;
+
+ if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ xfrm = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->key != xfrm->key ||
+ priv->dreg != xfrm->dreg ||
+ priv->dir != xfrm->dir ||
+ priv->spnum != xfrm->spnum) {
+ nft_reg_track_update(track, expr, priv->dreg, priv->len);
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return nft_expr_reduce_bitwise(track, expr);
+}
static struct nft_expr_type nft_xfrm_type;
static const struct nft_expr_ops nft_xfrm_get_ops = {
@@ -261,6 +288,7 @@ static const struct nft_expr_ops nft_xfrm_get_ops = {
.init = nft_xfrm_get_init,
.dump = nft_xfrm_get_dump,
.validate = nft_xfrm_validate,
+ .reduce = nft_xfrm_reduce,
};
static struct nft_expr_type nft_xfrm_type __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 25524e393349..470282cf3fae 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -766,7 +766,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
msize += off;
m->u.user.match_size = msize;
- strlcpy(name, match->name, sizeof(name));
+ strscpy(name, match->name, sizeof(name));
module_put(match->me);
strncpy(m->u.user.name, name, sizeof(m->u.user.name));
@@ -1146,7 +1146,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
tsize += off;
t->u.user.target_size = tsize;
- strlcpy(name, target->name, sizeof(name));
+ strscpy(name, target->name, sizeof(name));
module_put(target->me);
strncpy(t->u.user.name, name, sizeof(t->u.user.name));
@@ -1517,7 +1517,7 @@ EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
{
- u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u8 af = (unsigned long)pde_data(file_inode(seq->file));
struct net *net = seq_file_net(seq);
struct xt_pernet *xt_net;
@@ -1529,7 +1529,7 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u8 af = (unsigned long)pde_data(file_inode(seq->file));
struct net *net = seq_file_net(seq);
struct xt_pernet *xt_net;
@@ -1540,7 +1540,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void xt_table_seq_stop(struct seq_file *seq, void *v)
{
- u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
+ u_int8_t af = (unsigned long)pde_data(file_inode(seq->file));
mutex_unlock(&xt[af].mutex);
}
@@ -1584,7 +1584,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
[MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE,
};
- uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
+ uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
if (ppos != NULL)
@@ -1633,7 +1633,7 @@ static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
{
- uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
+ uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file));
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
@@ -1827,7 +1827,7 @@ int xt_proto_init(struct net *net, u_int8_t af)
root_uid = make_kuid(net->user_ns, 0);
root_gid = make_kgid(net->user_ns, 0);
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops,
sizeof(struct seq_net_private),
@@ -1837,7 +1837,7 @@ int xt_proto_init(struct net *net, u_int8_t af)
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc = proc_create_seq_private(buf, 0440, net->proc_net,
&xt_match_seq_ops, sizeof(struct nf_mttg_trav),
@@ -1847,7 +1847,7 @@ int xt_proto_init(struct net *net, u_int8_t af)
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
proc = proc_create_seq_private(buf, 0440, net->proc_net,
&xt_target_seq_ops, sizeof(struct nf_mttg_trav),
@@ -1862,12 +1862,12 @@ int xt_proto_init(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
out_remove_matches:
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
out_remove_tables:
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
out:
@@ -1881,15 +1881,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
- strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strscpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
#endif /*CONFIG_PROC_FS*/
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 267757b0392a..2be2f7a7b60f 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
return -ENOMEM;
}
- help->helper = helper;
+ rcu_assign_pointer(help->helper, helper);
return 0;
}
@@ -136,6 +136,21 @@ static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info)
}
}
+static void xt_ct_put_helper(struct nf_conn_help *help)
+{
+ struct nf_conntrack_helper *helper;
+
+ if (!help)
+ return;
+
+ /* not yet exposed to other cpus, or ruleset
+ * already detached (post-replacement).
+ */
+ helper = rcu_dereference_raw(help->helper);
+ if (helper)
+ nf_conntrack_helper_put(helper);
+}
+
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
@@ -207,8 +222,7 @@ out:
err4:
help = nfct_help(ct);
- if (help)
- nf_conntrack_helper_put(help->helper);
+ xt_ct_put_helper(help);
err3:
nf_ct_tmpl_free(ct);
err2:
@@ -270,8 +284,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
if (ct) {
help = nfct_help(ct);
- if (help)
- nf_conntrack_helper_put(help->helper);
+ xt_ct_put_helper(help);
nf_ct_netns_put(par->net, par->family);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index eababc354ff1..cfa44515ab72 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -24,6 +24,8 @@ MODULE_ALIAS("ip6t_DSCP");
MODULE_ALIAS("ipt_TOS");
MODULE_ALIAS("ip6t_TOS");
+#define XT_DSCP_ECN_MASK 3u
+
static unsigned int
dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -34,8 +36,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
- ipv4_change_dsfield(ip_hdr(skb),
- (__force __u8)(~XT_DSCP_MASK),
+ ipv4_change_dsfield(ip_hdr(skb), XT_DSCP_ECN_MASK,
dinfo->dscp << XT_DSCP_SHIFT);
}
@@ -52,8 +53,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;
- ipv6_change_dsfield(ipv6_hdr(skb),
- (__force __u8)(~XT_DSCP_MASK),
+ ipv6_change_dsfield(ipv6_hdr(skb), XT_DSCP_ECN_MASK,
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 8aec1b529364..80f6624e2355 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -144,7 +144,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
goto err1;
gnet_stats_basic_sync_init(&est->bstats);
- strlcpy(est->name, info->name, sizeof(est->name));
+ strscpy(est->name, info->name, sizeof(est->name));
spin_lock_init(&est->lock);
est->refcnt = 1;
est->params.interval = info->interval;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 122db9fbb9f4..116a885adb3c 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -239,8 +239,8 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
oldlen = ipv6h->payload_len;
newlen = htons(ntohs(oldlen) + ret);
if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(csum_sub(skb->csum, oldlen),
- newlen);
+ skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)oldlen),
+ (__force __wsum)newlen);
ipv6h->payload_len = newlen;
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 459d0696c91a..e4bea1d346cf 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -74,18 +74,10 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
-
- pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
- iph->protocol, &iph->daddr, ntohs(hp->dest),
- &laddr, ntohs(lport), skb->mark);
-
nf_tproxy_assign_sock(skb, sk);
return NF_ACCEPT;
}
- pr_debug("no socket, dropping: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
- iph->protocol, &iph->saddr, ntohs(hp->source),
- &iph->daddr, ntohs(hp->dest), skb->mark);
return NF_DROP;
}
@@ -122,16 +114,12 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
int tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0) {
- pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+ if (tproto < 0)
return NF_DROP;
- }
hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n");
+ if (!hp)
return NF_DROP;
- }
/* check if there's an ongoing connection on the packet
* addresses, this happens if the redirect already happened
@@ -168,19 +156,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
-
- pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
- tproto, &iph->saddr, ntohs(hp->source),
- laddr, ntohs(lport), skb->mark);
-
nf_tproxy_assign_sock(skb, sk);
return NF_ACCEPT;
}
- pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
- tproto, &iph->saddr, ntohs(hp->source),
- &iph->daddr, ntohs(hp->dest), skb->mark);
-
return NF_DROP;
}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 46fcac75f726..5d04ef80a61d 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -62,10 +62,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
key[4] = zone->id;
} else {
const struct iphdr *iph = ip_hdr(skb);
- key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
- iph->daddr : iph->saddr;
- key[0] &= info->mask.ip;
+ key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
+ (__force __u32)iph->daddr : (__force __u32)iph->saddr;
+ key[0] &= (__force __u32)info->mask.ip;
key[1] = zone->id;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9c5cfd74a0ee..0859b8f76764 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1052,7 +1052,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
static void *dl_seq_start(struct seq_file *s, loff_t *pos)
__acquires(htable->lock)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket;
spin_lock_bh(&htable->lock);
@@ -1069,7 +1069,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
*pos = ++(*bucket);
@@ -1083,7 +1083,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
if (!IS_ERR(bucket))
@@ -1125,7 +1125,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1140,7 +1140,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1155,7 +1155,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
{
- struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file));
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1169,7 +1169,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_show_v2(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
@@ -1183,7 +1183,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v)
static int dl_seq_show_v1(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
struct dsthash_ent *ent;
@@ -1197,7 +1197,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
static int dl_seq_show(struct seq_file *s, void *v)
{
- struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
+ struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file));
unsigned int *bucket = v;
struct dsthash_ent *ent;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0446307516cd..7ddb9a78e3fc 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -551,7 +551,7 @@ static int recent_seq_open(struct inode *inode, struct file *file)
if (st == NULL)
return -ENOMEM;
- st->table = PDE_DATA(inode);
+ st->table = pde_data(inode);
return 0;
}
@@ -559,7 +559,7 @@ static ssize_t
recent_mt_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *loff)
{
- struct recent_table *t = PDE_DATA(file_inode(file));
+ struct recent_table *t = pde_data(file_inode(file));
struct recent_entry *e;
char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
const char *c = buf;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5e6459e11605..7013f55f05d1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -220,8 +220,10 @@ static void socket_mt_destroy(const struct xt_mtdtor_param *par)
{
if (par->family == NFPROTO_IPV4)
nf_defrag_ipv4_disable(par->net);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
- nf_defrag_ipv4_disable(par->net);
+ nf_defrag_ipv6_disable(par->net);
+#endif
}
static struct xt_match socket_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 203e24ae472c..b26c1dcfc27b 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -34,7 +34,7 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
switch (info->mode) {
case XT_STATISTIC_MODE_RANDOM:
- if ((prandom_u32() & 0x7FFFFFFF) < info->u.random.probability)
+ if ((get_random_u32() & 0x7FFFFFFF) < info->u.random.probability)
ret = !ret;
break;
case XT_STATISTIC_MODE_NTH:
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 91a19c3ea1a3..f1d5b8465217 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -344,6 +344,7 @@ static struct genl_family netlbl_calipso_gnl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = netlbl_calipso_ops,
.n_small_ops = ARRAY_SIZE(netlbl_calipso_ops),
+ .resv_start_op = NLBL_CALIPSO_C_LISTALL + 1,
};
/* NetLabel Generic NETLINK Protocol Functions
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 894e6b8f1a86..fa08ee75ac06 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -767,6 +767,7 @@ static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = netlbl_cipsov4_ops,
.n_small_ops = ARRAY_SIZE(netlbl_cipsov4_ops),
+ .resv_start_op = NLBL_CIPSOV4_C_LISTALL + 1,
};
/*
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index beb0e573266d..54c083003947 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
unsigned char bitmask;
unsigned char byte;
+ if (offset >= bitmap_len)
+ return -1;
byte_offset = offset / 8;
byte = bitmap[byte_offset];
bit_spot = offset;
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 032b7d7b32c7..689eaa2afbec 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -826,6 +826,7 @@ static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = netlbl_mgmt_genl_ops,
.n_small_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),
+ .resv_start_op = NLBL_MGMT_C_VERSION + 1,
};
/*
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 8490e46359ae..9996883bf2b7 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -885,7 +885,7 @@ static int netlbl_unlabel_staticadd(struct sk_buff *skb,
/* Don't allow users to add both IPv4 and IPv6 addresses for a
* single entry. However, allow users to create two entries, one each
- * for IPv4 and IPv4, with the same LSM security context which should
+ * for IPv4 and IPv6, with the same LSM security context which should
* achieve the same result. */
if (!info->attrs[NLBL_UNLABEL_A_SECCTX] ||
!info->attrs[NLBL_UNLABEL_A_IFACE] ||
@@ -1374,6 +1374,7 @@ static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = netlbl_unlabel_genl_ops,
.n_small_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
+ .resv_start_op = NLBL_UNLABEL_C_STATICLISTDEF + 1,
};
/*
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7b344035bfe3..a662e8a5ff84 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -159,6 +159,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack);
static inline u32 netlink_group_mask(u32 group)
{
+ if (group > 32)
+ return 0;
return group ? 1 << (group - 1) : 0;
}
@@ -1929,7 +1931,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
size_t copied;
struct sk_buff *skb, *data_skb;
int err, ret;
@@ -1939,7 +1940,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
@@ -1973,7 +1974,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = len;
}
- skb_reset_transport_header(data_skb);
err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
if (msg->msg_name) {
@@ -2261,6 +2261,13 @@ static int netlink_dump(struct sock *sk)
* single netdev. The outcome is MSG_TRUNC error.
*/
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+
+ /* Make sure malicious BPF programs can not read unitialized memory
+ * from skb->head -> skb->data
+ */
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
@@ -2393,6 +2400,69 @@ error_free:
}
EXPORT_SYMBOL(__netlink_dump_start);
+static size_t
+netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
+ const struct netlink_ext_ack *extack)
+{
+ size_t tlvlen;
+
+ if (!extack || !(nlk->flags & NETLINK_F_EXT_ACK))
+ return 0;
+
+ tlvlen = 0;
+ if (extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+ if (extack->cookie_len)
+ tlvlen += nla_total_size(extack->cookie_len);
+
+ /* Following attributes are only reported as error (not warning) */
+ if (!err)
+ return tlvlen;
+
+ if (extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->policy)
+ tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
+ if (extack->miss_type)
+ tlvlen += nla_total_size(sizeof(u32));
+ if (extack->miss_nest)
+ tlvlen += nla_total_size(sizeof(u32));
+
+ return tlvlen;
+}
+
+static void
+netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb,
+ struct nlmsghdr *nlh, int err,
+ const struct netlink_ext_ack *extack)
+{
+ if (extack->_msg)
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg));
+ if (extack->cookie_len)
+ WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
+ extack->cookie_len, extack->cookie));
+
+ if (!err)
+ return;
+
+ if (extack->bad_attr &&
+ !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
+ (u8 *)extack->bad_attr >= in_skb->data + in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
+ (u8 *)extack->bad_attr - (u8 *)nlh));
+ if (extack->policy)
+ netlink_policy_dump_write_attr(skb, extack->policy,
+ NLMSGERR_ATTR_POLICY);
+ if (extack->miss_type)
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE,
+ extack->miss_type));
+ if (extack->miss_nest &&
+ !WARN_ON((u8 *)extack->miss_nest < in_skb->data ||
+ (u8 *)extack->miss_nest > in_skb->data + in_skb->len))
+ WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST,
+ (u8 *)extack->miss_nest - (u8 *)nlh));
+}
+
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
const struct netlink_ext_ack *extack)
{
@@ -2400,29 +2470,20 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
struct nlmsghdr *rep;
struct nlmsgerr *errmsg;
size_t payload = sizeof(*errmsg);
- size_t tlvlen = 0;
struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
unsigned int flags = 0;
- bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK;
+ size_t tlvlen;
/* Error messages get the original request appened, unless the user
* requests to cap the error message, and get extra error data if
* requested.
*/
- if (nlk_has_extack && extack && extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
-
if (err && !(nlk->flags & NETLINK_F_CAP_ACK))
payload += nlmsg_len(nlh);
else
flags |= NLM_F_CAPPED;
- if (err && nlk_has_extack && extack && extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- if (nlk_has_extack && extack && extack->cookie_len)
- tlvlen += nla_total_size(extack->cookie_len);
- if (err && nlk_has_extack && extack && extack->policy)
- tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
+ tlvlen = netlink_ack_tlv_len(nlk, err, extack);
if (tlvlen)
flags |= NLM_F_ACK_TLVS;
@@ -2433,31 +2494,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
return;
}
- rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- NLMSG_ERROR, payload, flags);
+ rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+ NLMSG_ERROR, payload, flags);
errmsg = nlmsg_data(rep);
errmsg->error = err;
- memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
+ unsafe_memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg)
+ ? nlh->nlmsg_len : sizeof(*nlh),
+ /* Bounds checked by the skb layer. */);
- if (nlk_has_extack && extack) {
- if (extack->_msg) {
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
- extack->_msg));
- }
- if (err && extack->bad_attr &&
- !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
- (u8 *)extack->bad_attr >= in_skb->data +
- in_skb->len))
- WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
- (u8 *)extack->bad_attr -
- (u8 *)nlh));
- if (extack->cookie_len)
- WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
- extack->cookie_len, extack->cookie));
- if (extack->policy)
- netlink_policy_dump_write_attr(skb, extack->policy,
- NLMSGERR_ATTR_POLICY);
- }
+ if (tlvlen)
+ netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
nlmsg_end(skb, rep);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1afca2a6c2ac..3e16527beb91 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -78,10 +78,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
static unsigned long *mc_groups = &mc_group_start;
static unsigned long mc_groups_longs = 1;
+/* We need the last attribute with non-zero ID therefore a 2-entry array */
+static struct nla_policy genl_policy_reject_all[] = {
+ { .type = NLA_REJECT },
+ { .type = NLA_REJECT },
+};
+
static int genl_ctrl_event(int event, const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id);
+static void
+genl_op_fill_in_reject_policy(const struct genl_family *family,
+ struct genl_ops *op)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(genl_policy_reject_all) - 1 != 1);
+
+ if (op->policy || op->cmd < family->resv_start_op)
+ return;
+
+ op->policy = genl_policy_reject_all;
+ op->maxattr = 1;
+}
+
static const struct genl_family *genl_family_find_byid(unsigned int id)
{
return idr_find(&genl_fam_idr, id);
@@ -113,6 +132,8 @@ static void genl_op_from_full(const struct genl_family *family,
op->maxattr = family->maxattr;
if (!op->policy)
op->policy = family->policy;
+
+ genl_op_fill_in_reject_policy(family, op);
}
static int genl_get_cmd_full(u32 cmd, const struct genl_family *family,
@@ -142,6 +163,8 @@ static void genl_op_from_small(const struct genl_family *family,
op->maxattr = family->maxattr;
op->policy = family->policy;
+
+ genl_op_fill_in_reject_policy(family, op);
}
static int genl_get_cmd_small(u32 cmd, const struct genl_family *family,
@@ -357,6 +380,8 @@ static int genl_validate_ops(const struct genl_family *family)
genl_get_cmd_by_index(i, family, &op);
if (op.dumpit == NULL && op.doit == NULL)
return -EINVAL;
+ if (WARN_ON(op.cmd >= family->resv_start_op && op.validate))
+ return -EINVAL;
for (j = i + 1; j < genl_get_cmd_cnt(family); j++) {
struct genl_ops op2;
@@ -739,6 +764,36 @@ out:
return err;
}
+static int genl_header_check(const struct genl_family *family,
+ struct nlmsghdr *nlh, struct genlmsghdr *hdr,
+ struct netlink_ext_ack *extack)
+{
+ u16 flags;
+
+ /* Only for commands added after we started validating */
+ if (hdr->cmd < family->resv_start_op)
+ return 0;
+
+ if (hdr->reserved) {
+ NL_SET_ERR_MSG(extack, "genlmsghdr.reserved field is not 0");
+ return -EINVAL;
+ }
+
+ /* Old netlink flags have pretty loose semantics, allow only the flags
+ * consumed by the core where we can enforce the meaning.
+ */
+ flags = nlh->nlmsg_flags;
+ if ((flags & NLM_F_DUMP) == NLM_F_DUMP) /* DUMP is 2 bits */
+ flags &= ~NLM_F_DUMP;
+ if (flags & ~(NLM_F_REQUEST | NLM_F_ACK | NLM_F_ECHO)) {
+ NL_SET_ERR_MSG(extack,
+ "ambiguous or reserved bits set in nlmsg_flags");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int genl_family_rcv_msg(const struct genl_family *family,
struct sk_buff *skb,
struct nlmsghdr *nlh,
@@ -757,6 +812,9 @@ static int genl_family_rcv_msg(const struct genl_family *family,
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
return -EINVAL;
+ if (genl_header_check(family, nlh, hdr, extack))
+ return -EINVAL;
+
if (genl_get_cmd(hdr->cmd, family, &op))
return -EOPNOTSUPP;
@@ -1174,13 +1232,17 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
op.policy,
op.maxattr);
if (err)
- return err;
+ goto err_free_state;
}
}
if (!ctx->state)
return -ENODATA;
return 0;
+
+err_free_state:
+ netlink_policy_dump_free(ctx->state);
+ return err;
}
static void *ctrl_dumppolicy_prep(struct sk_buff *skb,
@@ -1344,6 +1406,7 @@ static struct genl_family genl_ctrl __ro_after_init = {
.module = THIS_MODULE,
.ops = genl_ctrl_ops,
.n_ops = ARRAY_SIZE(genl_ctrl_ops),
+ .resv_start_op = CTRL_CMD_GETPOLICY + 1,
.mcgrps = genl_ctrl_groups,
.n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
.id = GENL_ID_CTRL,
@@ -1358,7 +1421,7 @@ static int genl_bind(struct net *net, int group)
unsigned int id;
int ret = 0;
- genl_lock_all();
+ down_read(&cb_lock);
idr_for_each_entry(&genl_fam_idr, family, id) {
const struct genl_multicast_group *grp;
@@ -1379,7 +1442,7 @@ static int genl_bind(struct net *net, int group)
break;
}
- genl_unlock_all();
+ up_read(&cb_lock);
return ret;
}
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 8d7c900e27f4..87e3de0fde89 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -144,7 +144,7 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
err = add_policy(&state, policy, maxtype);
if (err)
- return err;
+ goto err_try_undo;
for (policy_idx = 0;
policy_idx < state->n_alloc && state->policies[policy_idx].policy;
@@ -164,7 +164,7 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
policy[type].nested_policy,
policy[type].len);
if (err)
- return err;
+ goto err_try_undo;
break;
default:
break;
@@ -174,6 +174,16 @@ int netlink_policy_dump_add_policy(struct netlink_policy_dump_state **pstate,
*pstate = state;
return 0;
+
+err_try_undo:
+ /* Try to preserve reasonable unwind semantics - if we're starting from
+ * scratch clean up fully, otherwise record what we got and caller will.
+ */
+ if (!*pstate)
+ netlink_policy_dump_free(state);
+ else
+ *pstate = state;
+ return err;
}
static bool
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fa9dc2ba3941..6f7f4392cffb 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1159,7 +1159,8 @@ static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
/* Now we can treat all alike */
- if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) {
+ skb = skb_recv_datagram(sk, flags, &er);
+ if (!skb) {
release_sock(sk);
return er;
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index dc7a2404efdf..eb2c0959e5b6 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
kfree_skb(skb);
goto error;
@@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -975,7 +975,7 @@ static void nfc_release(struct device *d)
kfree(se);
}
- ida_simple_remove(&nfc_index_ida, dev->idx);
+ ida_free(&nfc_index_ida, dev->idx);
kfree(dev);
}
@@ -1066,7 +1066,7 @@ struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
if (!dev)
return NULL;
- rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
+ rc = ida_alloc(&nfc_index_ida, GFP_KERNEL);
if (rc < 0)
goto err_free_dev;
dev->idx = rc;
@@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev)
dev->rfkill = NULL;
}
}
+ dev->shutting_down = false;
device_unlock(&dev->dev);
rc = nfc_genl_device_added(dev);
@@ -1165,13 +1166,12 @@ void nfc_unregister_device(struct nfc_dev *dev)
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
+ dev->rfkill = NULL;
}
+ dev->shutting_down = true;
device_unlock(&dev->dev);
if (dev->ops->check_presence) {
- device_lock(&dev->dev);
- dev->shutting_down = true;
- device_unlock(&dev->dev);
del_timer_sync(&dev->check_pres_timer);
cancel_work_sync(&dev->check_pres_work);
}
diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index 05c60988f59a..4902f5064098 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c
@@ -73,14 +73,12 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
if (firstfrag) {
firstfrag = false;
packet->message.header = HCP_HEADER(type, instruction);
- if (ptr) {
- memcpy(packet->message.data, ptr,
- data_link_len - 1);
- ptr += data_link_len - 1;
- }
} else {
- memcpy(&packet->message, ptr, data_link_len);
- ptr += data_link_len;
+ packet->message.header = *ptr++;
+ }
+ if (ptr) {
+ memcpy(packet->message.data, ptr, data_link_len - 1);
+ ptr += data_link_len - 1;
}
/* This is the last fragment, set the cb bit */
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index d49d4bf2e37c..c1d9be636933 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -6,7 +6,6 @@
enum llcp_state {
LLCP_CONNECTED = 1, /* wait_for_packet() wants that */
LLCP_CONNECTING,
- LLCP_DISCONNECTING,
LLCP_CLOSED,
LLCP_BOUND,
LLCP_LISTEN,
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 5ad5157aa9c5..3364caabef8b 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -383,7 +383,7 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
pr_debug("WKS %d\n", ssap);
/* This is a WKS, let's check if it's free */
- if (local->local_wks & BIT(ssap)) {
+ if (test_bit(ssap, &local->local_wks)) {
mutex_unlock(&local->sdp_lock);
return LLCP_SAP_MAX;
@@ -737,13 +737,6 @@ static void nfc_llcp_tx_work(struct work_struct *work)
print_hex_dump_debug("LLCP Tx: ", DUMP_PREFIX_OFFSET,
16, 1, skb->data, skb->len, true);
- if (ptype == LLCP_PDU_DISC && sk != NULL &&
- sk->sk_state == LLCP_DISCONNECTING) {
- nfc_llcp_sock_unlink(&local->sockets, sk);
- sock_orphan(sk);
- sock_put(sk);
- }
-
if (ptype == LLCP_PDU_I)
copy_skb = skb_copy(skb, GFP_ATOMIC);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 6cfd30fc0798..77642d18a3b4 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -108,21 +108,13 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
llcp_sock->service_name_len,
GFP_KERNEL);
if (!llcp_sock->service_name) {
- nfc_llcp_local_put(llcp_sock->local);
- llcp_sock->local = NULL;
- llcp_sock->dev = NULL;
ret = -ENOMEM;
- goto put_dev;
+ goto sock_llcp_put_local;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
- nfc_llcp_local_put(llcp_sock->local);
- llcp_sock->local = NULL;
- kfree(llcp_sock->service_name);
- llcp_sock->service_name = NULL;
- llcp_sock->dev = NULL;
ret = -EADDRINUSE;
- goto put_dev;
+ goto free_service_name;
}
llcp_sock->reserved_ssap = llcp_sock->ssap;
@@ -132,6 +124,19 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap);
sk->sk_state = LLCP_BOUND;
+ nfc_put_device(dev);
+ release_sock(sk);
+
+ return 0;
+
+free_service_name:
+ kfree(llcp_sock->service_name);
+ llcp_sock->service_name = NULL;
+
+sock_llcp_put_local:
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
+ llcp_sock->dev = NULL;
put_dev:
nfc_put_device(dev);
@@ -626,23 +631,16 @@ static int llcp_sock_release(struct socket *sock)
}
}
- if (llcp_sock->reserved_ssap < LLCP_SAP_MAX)
- nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap);
-
- release_sock(sk);
-
- /* Keep this sock alive and therefore do not remove it from the sockets
- * list until the DISC PDU has been actually sent. Otherwise we would
- * reply with DM PDUs before sending the DISC one.
- */
- if (sk->sk_state == LLCP_DISCONNECTING)
- return err;
-
if (sock->type == SOCK_RAW)
nfc_llcp_sock_unlink(&local->raw_sockets, sk);
else
nfc_llcp_sock_unlink(&local->sockets, sk);
+ if (llcp_sock->reserved_ssap < LLCP_SAP_MAX)
+ nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap);
+
+ release_sock(sk);
+
out:
sock_orphan(sk);
sock_put(sk);
@@ -712,10 +710,8 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->local = nfc_llcp_local_get(local);
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
- nfc_llcp_local_put(llcp_sock->local);
- llcp_sock->local = NULL;
ret = -ENOMEM;
- goto put_dev;
+ goto sock_llcp_put_local;
}
llcp_sock->reserved_ssap = llcp_sock->ssap;
@@ -760,8 +756,11 @@ sock_unlink:
sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
+
+sock_llcp_put_local:
nfc_llcp_local_put(llcp_sock->local);
llcp_sock->local = NULL;
+ llcp_sock->dev = NULL;
put_dev:
nfc_put_device(dev);
@@ -789,6 +788,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ if (!llcp_sock->local) {
+ release_sock(sk);
+ return -ENODEV;
+ }
+
if (sk->sk_type == SOCK_DGRAM) {
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
@@ -817,7 +821,6 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
unsigned int copied, rlen;
struct sk_buff *skb, *cskb;
@@ -838,7 +841,7 @@ static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
pr_err("Recv datagram failed state %d %d %d",
sk->sk_state, err, sock_error(sk));
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index d2537383a3e8..6a193cce2a75 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -560,6 +560,10 @@ static int nci_close_device(struct nci_dev *ndev)
mutex_lock(&ndev->req_lock);
if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
+ /* Need to flush the cmd wq in case
+ * there is a queued/running cmd_work
+ */
+ flush_workqueue(ndev->cmd_wq);
del_timer_sync(&ndev->cmd_timer);
del_timer_sync(&ndev->data_timer);
mutex_unlock(&ndev->req_lock);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 6055dc9a82aa..aa5e712adf07 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
skb_frag = nci_skb_alloc(ndev,
(NCI_DATA_HDR_SIZE + frag_len),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (skb_frag == NULL) {
rc = -ENOMEM;
goto free_exit;
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 19703a649b5a..78c4b6addf15 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
i = 0;
skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
if (i < data_len) {
skb = nci_skb_alloc(ndev,
conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index c027c76d493c..cc8fa9e36159 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -317,14 +317,13 @@ static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
* Arguments:
*
* tty pointer to tty instance data
- * file pointer to open file object for device
* cmd IOCTL command code
* arg argument for IOCTL call (cmd dependent)
*
* Return Value: Command dependent
*/
-static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
- unsigned int cmd, unsigned long arg)
+static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd,
+ unsigned long arg)
{
struct nci_uart *nu = (void *)tty->disc_data;
int err = 0;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f184b0db79d4..9d91087b9399 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!msg)
return -ENOMEM;
@@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
genlmsg_end(msg, hdr);
- genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
return 0;
@@ -1783,6 +1783,7 @@ static struct genl_family nfc_genl_family __ro_after_init = {
.module = THIS_MODULE,
.ops = nfc_genl_ops,
.n_ops = ARRAY_SIZE(nfc_genl_ops),
+ .resv_start_op = NFC_CMD_DEACTIVATE_TARGET + 1,
.mcgrps = nfc_genl_mcgrps,
.n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
};
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 0ca214ab5aef..8dd569765f96 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -238,7 +238,6 @@ static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied;
@@ -246,7 +245,7 @@ static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags);
- skb = skb_recv_datagram(sk, flags, noblock, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb)
return rc;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..ca3ebfdb3023 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -373,6 +373,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
*addr = new_addr;
}
@@ -420,15 +421,47 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
{
+ u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+ ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
+ (__force __wsum)(ipv6_tclass << 12));
+
+ ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
+{
+ u32 ofl;
+
+ ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
+ fl = OVS_MASKED(ofl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ nh->flow_lbl[0] = (u8)(fl >> 16);
+ nh->flow_lbl[1] = (u8)(fl >> 8);
+ nh->flow_lbl[2] = (u8)fl;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+ new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
+ (__force __wsum)(new_ttl << 8));
+ nh->hop_limit = new_ttl;
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +579,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
flow_key->ip.tos = ipv6_get_dsfield(nh);
}
if (mask->ipv6_label) {
- set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
flow_key->ipv6.label =
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
- mask->ipv6_hlimit);
+ set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
@@ -630,6 +662,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
+ ovs_ct_clear(skb, NULL);
inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -669,6 +702,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
uh->dest = dst;
flow_key->tp.src = src;
flow_key->tp.dst = dst;
+ ovs_ct_clear(skb, NULL);
}
skb_clear_hash(skb);
@@ -731,6 +765,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
+
flow_key->tp.src = sh->source;
flow_key->tp.dst = sh->dest;
@@ -997,7 +1033,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
actions = nla_next(sample_arg, &rem);
if ((arg->probability != U32_MAX) &&
- (!arg->probability || prandom_u32() > arg->probability)) {
+ (!arg->probability || get_random_u32() > arg->probability)) {
if (last)
consume_skb(skb);
return 0;
@@ -1021,7 +1057,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb,
int rem = nla_len(attr);
bool dont_clone_flow_key;
- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+ /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
clone_arg = nla_data(attr);
dont_clone_flow_key = nla_get_u32(clone_arg);
actions = nla_next(clone_arg, &rem);
@@ -1509,8 +1545,8 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
pr_warn("%s: deferred action limit reached, drop sample action\n",
ovs_dp_name(dp));
} else { /* Recirc action */
- pr_warn("%s: deferred action limit reached, drop recirc action\n",
- ovs_dp_name(dp));
+ pr_warn("%s: deferred action limit reached, drop recirc action (recirc_id=%#x)\n",
+ ovs_dp_name(dp), recirc_id);
}
}
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c07afff57dd3..c7b10234cf7c 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -734,6 +734,57 @@ static bool skb_nfct_cached(struct net *net,
}
#if IS_ENABLED(CONFIG_NF_NAT)
+static void ovs_nat_update_key(struct sw_flow_key *key,
+ const struct sk_buff *skb,
+ enum nf_nat_manip_type maniptype)
+{
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ __be16 src;
+
+ key->ct_state |= OVS_CS_F_SRC_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.src = ip_hdr(skb)->saddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
+ sizeof(key->ipv6.addr.src));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ src = udp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_TCP)
+ src = tcp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ src = sctp_hdr(skb)->source;
+ else
+ return;
+
+ key->tp.src = src;
+ } else {
+ __be16 dst;
+
+ key->ct_state |= OVS_CS_F_DST_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.dst = ip_hdr(skb)->daddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
+ sizeof(key->ipv6.addr.dst));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ dst = udp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_TCP)
+ dst = tcp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ dst = sctp_hdr(skb)->dest;
+ else
+ return;
+
+ key->tp.dst = dst;
+ }
+}
+
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.
* Returns either NF_ACCEPT or NF_DROP.
@@ -741,7 +792,7 @@ static bool skb_nfct_cached(struct net *net,
static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
+ enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
{
int hooknum, nh_off, err = NF_ACCEPT;
@@ -813,58 +864,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
push:
skb_push_rcsum(skb, nh_off);
- return err;
-}
-
-static void ovs_nat_update_key(struct sw_flow_key *key,
- const struct sk_buff *skb,
- enum nf_nat_manip_type maniptype)
-{
- if (maniptype == NF_NAT_MANIP_SRC) {
- __be16 src;
-
- key->ct_state |= OVS_CS_F_SRC_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.src = ip_hdr(skb)->saddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
- sizeof(key->ipv6.addr.src));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- src = udp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_TCP)
- src = tcp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_SCTP)
- src = sctp_hdr(skb)->source;
- else
- return;
-
- key->tp.src = src;
- } else {
- __be16 dst;
-
- key->ct_state |= OVS_CS_F_DST_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.dst = ip_hdr(skb)->daddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
- sizeof(key->ipv6.addr.dst));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- dst = udp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_TCP)
- dst = tcp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_SCTP)
- dst = sctp_hdr(skb)->dest;
- else
- return;
+ /* Update the flow key if NAT successful. */
+ if (err == NF_ACCEPT)
+ ovs_nat_update_key(key, skb, maniptype);
- key->tp.dst = dst;
- }
+ return err;
}
/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
@@ -906,7 +910,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
} else {
return NF_ACCEPT; /* Connection is not NATed. */
}
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
if (ct->status & IPS_SRC_NAT) {
@@ -916,17 +920,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
maniptype = NF_NAT_MANIP_SRC;
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype);
+ maniptype, key);
} else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
+ NF_NAT_MANIP_SRC, key);
}
}
- /* Mark NAT done if successful and update the flow key. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
-
return err;
}
#else /* !CONFIG_NF_NAT */
@@ -1015,7 +1015,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
* connections which we will commit, we may need to attach
* the helper here.
*/
- if (info->commit && info->helper && !nfct_help(ct)) {
+ if (!nf_ct_is_confirmed(ct) && info->commit &&
+ info->helper && !nfct_help(ct)) {
int err = __nf_ct_try_assign_helper(ct, info->ct,
GFP_ATOMIC);
if (err)
@@ -1342,7 +1343,9 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key, false);
+
+ if (key)
+ ovs_ct_fill_key(skb, key, false);
return 0;
}
@@ -1980,7 +1983,8 @@ static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit,
} else {
struct ovs_ct_limit *ct_limit;
- ct_limit = kmalloc(sizeof(*ct_limit), GFP_KERNEL);
+ ct_limit = kmalloc(sizeof(*ct_limit),
+ GFP_KERNEL_ACCOUNT);
if (!ct_limit)
return -ENOMEM;
@@ -2250,14 +2254,16 @@ exit_err:
static const struct genl_small_ops ct_limit_genl_ops[] = {
{ .cmd = OVS_CT_LIMIT_CMD_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_ct_limit_cmd_set,
},
{ .cmd = OVS_CT_LIMIT_CMD_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege. */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_ct_limit_cmd_del,
},
{ .cmd = OVS_CT_LIMIT_CMD_GET,
@@ -2281,6 +2287,7 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = ct_limit_genl_ops,
.n_small_ops = ARRAY_SIZE(ct_limit_genl_ops),
+ .resv_start_op = OVS_CT_LIMIT_CMD_GET + 1,
.mcgrps = &ovs_ct_limit_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..8b84869eb2ac 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/pkt_cls.h>
#include "datapath.h"
#include "flow.h"
@@ -251,10 +252,17 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
- if (unlikely(error))
- kfree_skb(skb);
- else
+ switch (error) {
+ case 0:
+ case -EAGAIN:
+ case -ERESTARTSYS:
+ case -EINTR:
consume_skb(skb);
+ break;
+ default:
+ kfree_skb(skb);
+ break;
+ }
stats_counter = &stats->n_missed;
goto out;
}
@@ -550,8 +558,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
out:
if (err)
skb_tx_error(skb);
- kfree_skb(user_skb);
- kfree_skb(nskb);
+ consume_skb(user_skb);
+ consume_skb(nskb);
+
return err;
}
@@ -683,6 +692,7 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = dp_packet_genl_ops,
.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
+ .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
.module = THIS_MODULE,
};
@@ -1500,6 +1510,7 @@ static struct genl_family dp_flow_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = dp_flow_genl_ops,
.n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
+ .resv_start_op = OVS_FLOW_CMD_SET + 1,
.mcgrps = &ovs_dp_flow_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
@@ -1514,6 +1525,7 @@ static size_t ovs_dp_cmd_msg_size(void)
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
+ msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
return msgsize;
}
@@ -1525,7 +1537,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
struct ovs_header *ovs_header;
struct ovs_dp_stats dp_stats;
struct ovs_dp_megaflow_stats dp_megaflow_stats;
- int err;
+ struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
+ int err, pids_len;
ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
flags, cmd);
@@ -1555,6 +1568,12 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
ovs_flow_tbl_masks_cache_size(&dp->table)))
goto nla_put_failure;
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
+ pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
+ if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
+ goto nla_put_failure;
+ }
+
genlmsg_end(skb, ovs_header);
return 0;
@@ -1597,12 +1616,11 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
if (IS_ERR(dp))
return;
- WARN(dp->user_features, "Dropping previously announced user features\n");
+ pr_warn("%s: Dropping previously announced user features\n",
+ ovs_dp_name(dp));
dp->user_features = 0;
}
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
static int ovs_dp_set_upcall_portids(struct datapath *dp,
const struct nlattr *ids)
{
@@ -1657,7 +1675,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
- u32 user_features = 0;
+ u32 user_features = 0, old_features = dp->user_features;
int err;
if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1714,12 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return err;
}
- if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
- static_branch_enable(&tc_recirc_sharing_support);
- else
- static_branch_disable(&tc_recirc_sharing_support);
+ if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+ !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+ tc_skb_ext_tc_enable();
+ else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+ (old_features & OVS_DP_F_TC_RECIRC_SHARING))
+ tc_skb_ext_tc_disable();
return 0;
}
@@ -1778,6 +1798,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
+ parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
+ ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1801,7 +1823,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
- goto err_unlock_and_destroy_meters;
+ goto err_destroy_portids;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1816,6 +1838,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
+err_destroy_portids:
+ kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
ovs_unlock();
ovs_meters_exit(dp);
@@ -1839,6 +1863,9 @@ static void __dp_destroy(struct datapath *dp)
struct flow_table *table = &dp->table;
int i;
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ tc_skb_ext_tc_disable();
+
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
@@ -1992,6 +2019,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
+ [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
};
static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2028,6 +2056,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = dp_datapath_genl_ops,
.n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+ .resv_start_op = OVS_DP_CMD_SET + 1,
.mcgrps = &ovs_dp_datapath_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
@@ -2195,7 +2224,10 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
!a[OVS_VPORT_ATTR_UPCALL_PID])
return -EINVAL;
- if (a[OVS_VPORT_ATTR_IFINDEX])
+
+ parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
+
+ if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
return -EOPNOTSUPP;
port_no = a[OVS_VPORT_ATTR_PORT_NO]
@@ -2232,11 +2264,12 @@ restart:
}
parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
- parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
parms.options = a[OVS_VPORT_ATTR_OPTIONS];
parms.dp = dp;
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
+ parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
+ ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -2511,6 +2544,7 @@ struct genl_family dp_vport_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = dp_vport_genl_ops,
.n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
+ .resv_start_op = OVS_VPORT_CMD_SET + 1,
.mcgrps = &ovs_dp_vport_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..e20d1a973417 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
#include <net/mpls.h>
#include <net/ndisc.h>
#include <net/nsh.h>
+#include <net/pkt_cls.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include "conntrack.h"
@@ -240,6 +241,144 @@ static bool icmphdr_ok(struct sk_buff *skb)
sizeof(struct icmphdr));
}
+/**
+ * get_ipv6_ext_hdrs() - Parses packet and sets IPv6 extension header flags.
+ *
+ * @skb: buffer where extension header data starts in packet
+ * @nh: ipv6 header
+ * @ext_hdrs: flags are stored here
+ *
+ * OFPIEH12_UNREP is set if more than one of a given IPv6 extension header
+ * is unexpectedly encountered. (Two destination options headers may be
+ * expected and would not cause this bit to be set.)
+ *
+ * OFPIEH12_UNSEQ is set if IPv6 extension headers were not in the order
+ * preferred (but not required) by RFC 2460:
+ *
+ * When more than one extension header is used in the same packet, it is
+ * recommended that those headers appear in the following order:
+ * IPv6 header
+ * Hop-by-Hop Options header
+ * Destination Options header
+ * Routing header
+ * Fragment header
+ * Authentication header
+ * Encapsulating Security Payload header
+ * Destination Options header
+ * upper-layer header
+ */
+static void get_ipv6_ext_hdrs(struct sk_buff *skb, struct ipv6hdr *nh,
+ u16 *ext_hdrs)
+{
+ u8 next_type = nh->nexthdr;
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ int dest_options_header_count = 0;
+
+ *ext_hdrs = 0;
+
+ while (ipv6_ext_hdr(next_type)) {
+ struct ipv6_opt_hdr _hdr, *hp;
+
+ switch (next_type) {
+ case IPPROTO_NONE:
+ *ext_hdrs |= OFPIEH12_NONEXT;
+ /* stop parsing */
+ return;
+
+ case IPPROTO_ESP:
+ if (*ext_hdrs & OFPIEH12_ESP)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs & ~(OFPIEH12_HOP | OFPIEH12_DEST |
+ OFPIEH12_ROUTER | IPPROTO_FRAGMENT |
+ OFPIEH12_AUTH | OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_ESP;
+ break;
+
+ case IPPROTO_AH:
+ if (*ext_hdrs & OFPIEH12_AUTH)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs &
+ ~(OFPIEH12_HOP | OFPIEH12_DEST | OFPIEH12_ROUTER |
+ IPPROTO_FRAGMENT | OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_AUTH;
+ break;
+
+ case IPPROTO_DSTOPTS:
+ if (dest_options_header_count == 0) {
+ if (*ext_hdrs &
+ ~(OFPIEH12_HOP | OFPIEH12_UNREP))
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ *ext_hdrs |= OFPIEH12_DEST;
+ } else if (dest_options_header_count == 1) {
+ if (*ext_hdrs &
+ ~(OFPIEH12_HOP | OFPIEH12_DEST |
+ OFPIEH12_ROUTER | OFPIEH12_FRAG |
+ OFPIEH12_AUTH | OFPIEH12_ESP |
+ OFPIEH12_UNREP)) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ } else {
+ *ext_hdrs |= OFPIEH12_UNREP;
+ }
+ dest_options_header_count++;
+ break;
+
+ case IPPROTO_FRAGMENT:
+ if (*ext_hdrs & OFPIEH12_FRAG)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs & ~(OFPIEH12_HOP |
+ OFPIEH12_DEST |
+ OFPIEH12_ROUTER |
+ OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_FRAG;
+ break;
+
+ case IPPROTO_ROUTING:
+ if (*ext_hdrs & OFPIEH12_ROUTER)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ if ((*ext_hdrs & ~(OFPIEH12_HOP |
+ OFPIEH12_DEST |
+ OFPIEH12_UNREP)) ||
+ dest_options_header_count >= 2) {
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ }
+ *ext_hdrs |= OFPIEH12_ROUTER;
+ break;
+
+ case IPPROTO_HOPOPTS:
+ if (*ext_hdrs & OFPIEH12_HOP)
+ *ext_hdrs |= OFPIEH12_UNREP;
+ /* OFPIEH12_HOP is set to 1 if a hop-by-hop IPv6
+ * extension header is present as the first
+ * extension header in the packet.
+ */
+ if (*ext_hdrs == 0)
+ *ext_hdrs |= OFPIEH12_HOP;
+ else
+ *ext_hdrs |= OFPIEH12_UNSEQ;
+ break;
+
+ default:
+ return;
+ }
+
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (!hp)
+ break;
+ next_type = hp->nexthdr;
+ start += ipv6_optlen(hp);
+ }
+}
+
static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
unsigned short frag_off;
@@ -255,6 +394,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
nh = ipv6_hdr(skb);
+ get_ipv6_ext_hdrs(skb, nh, &key->ipv6.exthdrs);
+
key->ip.proto = NEXTHDR_NONE;
key->ip.tos = ipv6_get_dsfield(nh);
key->ip.ttl = nh->hop_limit;
@@ -266,7 +407,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
if (flags & IP6_FH_F_FRAG) {
if (frag_off) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- key->ip.proto = nexthdr;
+ key->ip.proto = NEXTHDR_FRAGMENT;
return 0;
}
key->ip.frag = OVS_FRAG_TYPE_FIRST;
@@ -895,7 +1036,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->mac_proto = res;
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 758a8c77f736..073ab73ffeaa 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -32,6 +32,19 @@ enum sw_flow_mac_proto {
#define SW_FLOW_KEY_INVALID 0x80
#define MPLS_LABEL_DEPTH 3
+/* Bit definitions for IPv6 Extension Header pseudo-field. */
+enum ofp12_ipv6exthdr_flags {
+ OFPIEH12_NONEXT = 1 << 0, /* "No next header" encountered. */
+ OFPIEH12_ESP = 1 << 1, /* Encrypted Sec Payload header present. */
+ OFPIEH12_AUTH = 1 << 2, /* Authentication header present. */
+ OFPIEH12_DEST = 1 << 3, /* 1 or 2 dest headers present. */
+ OFPIEH12_FRAG = 1 << 4, /* Fragment header present. */
+ OFPIEH12_ROUTER = 1 << 5, /* Router header present. */
+ OFPIEH12_HOP = 1 << 6, /* Hop-by-hop header present. */
+ OFPIEH12_UNREP = 1 << 7, /* Unexpected repeats encountered. */
+ OFPIEH12_UNSEQ = 1 << 8 /* Unexpected sequencing encountered. */
+};
+
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
@@ -121,6 +134,7 @@ struct sw_flow_key {
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
+ u16 exthdrs; /* IPv6 extension header flags */
union {
struct {
struct in6_addr src;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index fd1f809e9bc1..4a07ab094a84 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -346,7 +346,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
+ BUILD_BUG_ON(OVS_KEY_ATTR_MAX != 32);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -369,7 +369,8 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
- + nla_total_size(28); /* OVS_KEY_ATTR_ND */
+ + nla_total_size(28) /* OVS_KEY_ATTR_ND */
+ + nla_total_size(2); /* OVS_KEY_ATTR_IPV6_EXTHDRS */
}
static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
@@ -437,6 +438,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
[OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED,
.next = ovs_nsh_key_attr_lens, },
+ [OVS_KEY_ATTR_IPV6_EXTHDRS] = {
+ .len = sizeof(struct ovs_key_ipv6_exthdrs) },
};
static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -479,7 +482,14 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
return -EINVAL;
}
- if (attrs & (1 << type)) {
+ if (type == OVS_KEY_ATTR_PACKET_TYPE ||
+ type == OVS_KEY_ATTR_ND_EXTENSIONS ||
+ type == OVS_KEY_ATTR_TUNNEL_INFO) {
+ OVS_NLERR(log, "Key type %d is not supported", type);
+ return -EINVAL;
+ }
+
+ if (attrs & (1ULL << type)) {
OVS_NLERR(log, "Duplicate key (type %d).", type);
return -EINVAL;
}
@@ -492,7 +502,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
}
if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
- attrs |= 1 << type;
+ attrs |= 1ULL << type;
a[type] = nla;
}
}
@@ -1597,6 +1607,17 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
}
+ if (attrs & (1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS)) {
+ const struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
+
+ ipv6_exthdrs_key = nla_data(a[OVS_KEY_ATTR_IPV6_EXTHDRS]);
+
+ SW_FLOW_KEY_PUT(match, ipv6.exthdrs,
+ ipv6_exthdrs_key->hdrs, is_mask);
+
+ attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
const struct ovs_key_arp *arp_key;
@@ -2099,6 +2120,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv4_key->ipv4_frag = output->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
+ struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
if (!nla)
@@ -2113,6 +2135,13 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ipv6_key->ipv6_tclass = output->ip.tos;
ipv6_key->ipv6_hlimit = output->ip.ttl;
ipv6_key->ipv6_frag = output->ip.frag;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_EXTHDRS,
+ sizeof(*ipv6_exthdrs_key));
+ if (!nla)
+ goto nla_put_failure;
+ ipv6_exthdrs_key = nla_data(nla);
+ ipv6_exthdrs_key->hdrs = output->ipv6.exthdrs;
} else if (swkey->eth.type == htons(ETH_P_NSH)) {
if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
goto nla_put_failure;
@@ -2201,8 +2230,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
icmpv6_key->icmpv6_type = ntohs(output->tp.src);
icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
struct ovs_key_nd *nd_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
@@ -2288,6 +2317,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
return sfa;
}
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
+{
+ const struct nlattr *a;
+ int rem;
+
+ nla_for_each_nested(a, action, rem) {
+ switch (nla_type(a)) {
+ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
+ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
+ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+ break;
+ }
+ }
+}
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_CLONE_ATTR_EXEC:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
+static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+
+ switch (nla_type(a)) {
+ case OVS_DEC_TTL_ATTR_ACTION:
+ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+ break;
+ }
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_SAMPLE_ATTR_ARG:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
static void ovs_nla_free_set_action(const struct nlattr *a)
{
const struct nlattr *ovs_key = nla_data(a);
@@ -2301,25 +2386,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a)
}
}
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
{
const struct nlattr *a;
int rem;
- if (!sf_acts)
+ /* Whenever new actions are added, the need to update this
+ * function should be considered.
+ */
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+
+ if (!actions)
return;
- nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+ nla_for_each_attr(a, actions, len, rem) {
switch (nla_type(a)) {
- case OVS_ACTION_ATTR_SET:
- ovs_nla_free_set_action(a);
+ case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+ ovs_nla_free_check_pkt_len_action(a);
break;
+
+ case OVS_ACTION_ATTR_CLONE:
+ ovs_nla_free_clone_action(a);
+ break;
+
case OVS_ACTION_ATTR_CT:
ovs_ct_free_action(a);
break;
+
+ case OVS_ACTION_ATTR_DEC_TTL:
+ ovs_nla_free_dec_ttl_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ ovs_nla_free_sample_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ ovs_nla_free_set_action(a);
+ break;
}
}
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+ if (!sf_acts)
+ return;
+ ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
kfree(sf_acts);
}
@@ -2351,7 +2465,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
OVS_NLERR(log, "Flow action size exceeds max %u",
MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
@@ -3190,7 +3304,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
/* Disallow subsequent L2.5+ set actions and mpls_pop
* actions once the last MPLS label in the packet is
- * is popped as there is no check here to ensure that
+ * popped as there is no check here to ensure that
* the new eth type is valid and thus set actions could
* write off the end of the packet or otherwise corrupt
* it.
@@ -3429,7 +3543,9 @@ static int clone_action_to_attr(const struct nlattr *attr,
if (!start)
return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+ /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+ attr = nla_next(nla_data(attr), &rem);
+ err = ovs_nla_put_actions(attr, rem, skb);
if (err)
nla_nest_cancel(skb, start);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 04a060ac7fdf..6e38f68f88c2 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -343,7 +343,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
return ERR_PTR(-EINVAL);
/* Allocate and set up the meter before locking anything. */
- meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL);
+ meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT);
if (!meter)
return ERR_PTR(-ENOMEM);
@@ -687,9 +687,9 @@ static const struct genl_small_ops dp_meter_genl_ops[] = {
},
{ .cmd = OVS_METER_CMD_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege.
- */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_meter_cmd_set,
},
{ .cmd = OVS_METER_CMD_GET,
@@ -699,9 +699,9 @@ static const struct genl_small_ops dp_meter_genl_ops[] = {
},
{ .cmd = OVS_METER_CMD_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
- * privilege.
- */
+ .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+ * privilege.
+ */
.doit = ovs_meter_cmd_del
},
};
@@ -720,6 +720,7 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
.parallel_ops = true,
.small_ops = dp_meter_genl_ops,
.n_small_ops = ARRAY_SIZE(dp_meter_genl_ops),
+ .resv_start_op = OVS_METER_CMD_GET + 1,
.mcgrps = &ovs_meter_multicast_group,
.n_mcgrps = 1,
.module = THIS_MODULE,
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 5b2ee9c1c00b..74c88a6baa43 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -65,7 +65,7 @@ static int internal_dev_stop(struct net_device *netdev)
static void internal_dev_getinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
{
- strlcpy(info->driver, "openvswitch", sizeof(info->driver));
+ strscpy(info->driver, "openvswitch", sizeof(info->driver));
}
static const struct ethtool_ops internal_dev_ethtool_ops = {
@@ -147,6 +147,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
}
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
+ dev->ifindex = parms->desired_ifindex;
internal_dev = internal_dev_priv(vport->dev);
internal_dev->vport = vport;
@@ -189,7 +190,7 @@ static void internal_dev_destroy(struct vport *vport)
rtnl_unlock();
}
-static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
+static int internal_dev_recv(struct sk_buff *skb)
{
struct net_device *netdev = skb->dev;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b498dac4e1e0..2f61d5bdce1a 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -115,7 +115,7 @@ error_master_upper_dev_unlink:
error_unlock:
rtnl_unlock();
error_put:
- dev_put_track(vport->dev, &vport->dev_tracker);
+ netdev_put(vport->dev, &vport->dev_tracker);
error_free_vport:
ovs_vport_free(vport);
return ERR_PTR(err);
@@ -137,7 +137,7 @@ static void vport_netdev_free(struct rcu_head *rcu)
{
struct vport *vport = container_of(rcu, struct vport, rcu);
- dev_put_track(vport->dev, &vport->dev_tracker);
+ netdev_put(vport->dev, &vport->dev_tracker);
ovs_vport_free(vport);
}
@@ -173,7 +173,7 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
*/
if (vport->dev->reg_state == NETREG_REGISTERED)
rtnl_delete_link(vport->dev);
- dev_put_track(vport->dev, &vport->dev_tracker);
+ netdev_put(vport->dev, &vport->dev_tracker);
vport->dev = NULL;
rtnl_unlock();
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index cf2ce5812489..82a74f998966 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -507,7 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
}
skb->dev = vport->dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
vport->ops->send(skb);
return;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 9de5030d9801..6ff45e8a0868 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -90,12 +90,14 @@ struct vport {
* @type: New vport's type.
* @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
* none was supplied.
+ * @desired_ifindex: New vport's ifindex.
* @dp: New vport's datapath.
* @port_no: New vport's port number.
*/
struct vport_parms {
const char *name;
enum ovs_vport_type type;
+ int desired_ifindex;
struct nlattr *options;
/* For ovs_vport_alloc(). */
@@ -130,7 +132,7 @@ struct vport_ops {
int (*set_options)(struct vport *, struct nlattr *);
int (*get_options)(const struct vport *, struct sk_buff *);
- netdev_tx_t (*send) (struct sk_buff *skb);
+ int (*send)(struct sk_buff *skb);
struct module *owner;
struct list_head list;
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5bd409ab4cc2..6ce8dd19f33c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -460,7 +460,7 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
return TP_STATUS_TS_RAW_HARDWARE;
if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
- ktime_to_timespec64_cond(skb->tstamp, ts))
+ ktime_to_timespec64_cond(skb_tstamp(skb), ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
@@ -1350,7 +1350,7 @@ static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
if (READ_ONCE(history[i]) == rxhash)
count++;
- victim = prandom_u32() % ROLLOVER_HLEN;
+ victim = prandom_u32_max(ROLLOVER_HLEN);
/* Avoid dirtying the cache line if possible */
if (READ_ONCE(history[victim]) != rxhash)
@@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
match->max_num_members = args->max_num_members;
list_add(&match->list, &fanout_list);
@@ -1788,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < match->max_num_members) {
__dev_remove_pack(&po->prot_hook);
- po->fanout = match;
+
+ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+ WRITE_ONCE(po->fanout, match);
+
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -1901,7 +1905,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
*/
spkt->spkt_family = dev->type;
- strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
+ strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
spkt->spkt_protocol = skb->protocol;
/*
@@ -1920,12 +1924,22 @@ oom:
static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
+ int depth;
+
if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
sock->type == SOCK_RAW) {
skb_reset_mac_header(skb);
skb->protocol = dev_parse_header_protocol(skb);
}
+ /* Move network header to the right position for VLAN tagged packets */
+ if (likely(skb->dev->type == ARPHRD_ETHER) &&
+ eth_type_vlan(skb->protocol) &&
+ __vlan_get_protocol(skb, skb->protocol, &depth) != 0) {
+ if (pskb_may_pull(skb, depth))
+ skb_set_network_header(skb, depth);
+ }
+
skb_probe_transport_header(skb);
}
@@ -2195,6 +2209,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
sock_skb_set_dropcount(sk, skb);
+ skb_clear_delivery_time(skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
@@ -2313,8 +2328,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -2373,6 +2391,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
+ skb_clear_delivery_time(copy_skb);
__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
}
spin_unlock(&sk->sk_receive_queue.lock);
@@ -2849,8 +2868,9 @@ tpacket_error:
status = TP_STATUS_SEND_REQUEST;
err = po->xmit(skb);
- if (unlikely(err > 0)) {
- err = net_xmit_errno(err);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
TP_STATUS_AVAILABLE) {
/* skb was destructed already */
@@ -3017,8 +3037,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (err)
goto out_free;
- if (sock->type == SOCK_RAW &&
- !dev_validate_header(dev, skb->data, len)) {
+ if ((sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) || !skb->len) {
err = -EINVAL;
goto out_free;
}
@@ -3037,6 +3057,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
+ if (unlikely(extra_len == 4))
+ skb->no_fcs = 1;
+
+ packet_parse_headers(skb, sock);
+
if (has_vnet_hdr) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
@@ -3045,14 +3070,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
- packet_parse_headers(skb, sock);
-
- if (unlikely(extra_len == 4))
- skb->no_fcs = 1;
-
err = po->xmit(skb);
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
- goto out_unlock;
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto out_unlock;
+ }
dev_put(dev);
@@ -3110,7 +3134,7 @@ static int packet_release(struct socket *sock)
packet_cached_dev_reset(po);
if (po->prot_hook.dev) {
- dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker);
+ netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
@@ -3211,15 +3235,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
- dev_put_track(po->prot_hook.dev, &po->prot_hook.dev_tracker);
+ netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
if (unlikely(unlisted)) {
po->prot_hook.dev = NULL;
WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
- dev_hold_track(dev, &po->prot_hook.dev_tracker,
- GFP_ATOMIC);
+ netdev_hold(dev, &po->prot_hook.dev_tracker,
+ GFP_ATOMIC);
po->prot_hook.dev = dev;
WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
@@ -3353,6 +3377,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3411,7 +3436,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
* but then it will block.
*/
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
/*
* An error occurred so return it. Because skb_recv_datagram()
@@ -3454,9 +3479,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sll->sll_protocol = skb->protocol;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3479,6 +3506,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
@@ -3534,7 +3565,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
- strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
+ strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
return sizeof(*uaddr);
@@ -3932,7 +3963,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}
case PACKET_FANOUT_DATA:
{
- if (!po->fanout)
+ /* Paired with the WRITE_ONCE() in fanout_add() */
+ if (!READ_ONCE(po->fanout))
return -EINVAL;
return fanout_set_data(po, optval, optlen);
@@ -4135,8 +4167,8 @@ static int packet_notifier(struct notifier_block *this,
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
WRITE_ONCE(po->ifindex, -1);
- dev_put_track(po->prot_hook.dev,
- &po->prot_hook.dev_tracker);
+ netdev_put(po->prot_hook.dev,
+ &po->prot_hook.dev_tracker);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
@@ -4693,37 +4725,37 @@ static struct pernet_operations packet_net_ops = {
static void __exit packet_exit(void)
{
- unregister_netdevice_notifier(&packet_netdev_notifier);
- unregister_pernet_subsys(&packet_net_ops);
sock_unregister(PF_PACKET);
proto_unregister(&packet_proto);
+ unregister_netdevice_notifier(&packet_netdev_notifier);
+ unregister_pernet_subsys(&packet_net_ops);
}
static int __init packet_init(void)
{
int rc;
- rc = proto_register(&packet_proto, 0);
- if (rc)
- goto out;
- rc = sock_register(&packet_family_ops);
- if (rc)
- goto out_proto;
rc = register_pernet_subsys(&packet_net_ops);
if (rc)
- goto out_sock;
+ goto out;
rc = register_netdevice_notifier(&packet_netdev_notifier);
if (rc)
goto out_pernet;
+ rc = proto_register(&packet_proto, 0);
+ if (rc)
+ goto out_notifier;
+ rc = sock_register(&packet_family_ops);
+ if (rc)
+ goto out_proto;
return 0;
-out_pernet:
- unregister_pernet_subsys(&packet_net_ops);
-out_sock:
- sock_unregister(PF_PACKET);
out_proto:
proto_unregister(&packet_proto);
+out_notifier:
+ unregister_netdevice_notifier(&packet_netdev_notifier);
+out_pernet:
+ unregister_pernet_subsys(&packet_net_ops);
out:
return rc;
}
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 65218b7ce9f9..2b582da1e88c 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -146,7 +146,7 @@ EXPORT_SYMBOL(phonet_header_ops);
* Prepends an ISI header and sends a datagram.
*/
static int pn_send(struct sk_buff *skb, struct net_device *dev,
- u16 dst, u16 src, u8 res, u8 irq)
+ u16 dst, u16 src, u8 res)
{
struct phonethdr *ph;
int err;
@@ -182,7 +182,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
if (skb->pkt_type == PACKET_LOOPBACK) {
skb_reset_mac_header(skb);
skb_orphan(skb);
- err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0;
+ err = netif_rx(skb) ? -ENOBUFS : 0;
} else {
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
NULL, NULL, skb->len);
@@ -214,7 +214,7 @@ static int pn_raw_send(const void *data, int len, struct net_device *dev,
skb_reserve(skb, MAX_PHONET_HEADER);
__skb_put(skb, len);
skb_copy_to_linear_data(skb, data, len);
- return pn_send(skb, dev, dst, src, res, 1);
+ return pn_send(skb, dev, dst, src, res);
}
/*
@@ -269,7 +269,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
if (!pn_addr(src))
src = pn_object(saddr, pn_obj(src));
- err = pn_send(skb, dev, dst, src, res, 0);
+ err = pn_send(skb, dev, dst, src, res);
dev_put(dev);
return err;
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 393e6aa7a592..ff5f49ab236e 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -112,7 +112,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_buff *skb = NULL;
struct sockaddr_pn sa;
@@ -123,7 +123,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
MSG_CMSG_COMPAT))
goto out_nofree;
- skb = skb_recv_datagram(sk, flags, noblock, &rval);
+ skb = skb_recv_datagram(sk, flags, &rval);
if (skb == NULL)
goto out_nofree;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 65d463ad8770..83ea13a50690 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
- skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
+ skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ errp);
if (!skb)
return NULL;
@@ -1238,7 +1239,7 @@ struct sk_buff *pep_read(struct sock *sk)
}
static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_buff *skb;
int err;
@@ -1267,7 +1268,7 @@ static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EINVAL;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (skb == NULL) {
if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT)
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 118d5d2a81a0..81a794e36f53 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -115,6 +115,7 @@ static struct genl_family psample_nl_family __ro_after_init = {
.mcgrps = psample_nl_mcgrps,
.small_ops = psample_nl_ops,
.n_small_ops = ARRAY_SIZE(psample_nl_ops),
+ .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1,
.n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps),
};
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index ec2322529727..5c2fb992803b 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -1035,8 +1035,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
return -EADDRNOTAVAIL;
}
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb) {
release_sock(sk);
return rc;
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index fa611678af05..9ced13c0627a 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -78,11 +78,6 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
struct qrtr_mhi_dev *qdev;
int rc;
- /* start channels */
- rc = mhi_prepare_for_transfer(mhi_dev);
- if (rc)
- return rc;
-
qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
if (!qdev)
return -ENOMEM;
@@ -96,6 +91,13 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
if (rc)
return rc;
+ /* start channels */
+ rc = mhi_prepare_for_transfer_autoqueue(mhi_dev);
+ if (rc) {
+ qrtr_endpoint_unregister(&qdev->ep);
+ return rc;
+ }
+
dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
return 0;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b239120dd9ca..3ff6995244e5 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -894,7 +894,7 @@ module_exit(rds_exit);
u32 rds_gen_num;
-static int rds_init(void)
+static int __init rds_init(void)
{
int ret;
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 5b5fb4ca8d3e..97a29172a8ee 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -104,7 +104,7 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
return -EINVAL;
last = rover;
} else {
- rover = max_t(u16, prandom_u32(), 2);
+ rover = max_t(u16, get_random_u16(), 2);
last = rover - 1;
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 24c9a9005a6f..9826fe7f9d00 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -154,8 +154,8 @@ static int rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
rds_ibdev->odp_capable =
- !!(device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING) &&
+ !!(device->attrs.kernel_cap_flags &
+ IBK_ON_DEMAND_PAGING) &&
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
IB_ODP_SUPPORT_WRITE) &&
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 6fdedd9dbbc2..cfbf0e129cba 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -363,6 +363,7 @@ static int acquire_refill(struct rds_connection *conn)
static void release_refill(struct rds_connection *conn)
{
clear_bit(RDS_RECV_REFILL, &conn->c_flags);
+ smp_mb__after_atomic();
/* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
* hot path and finding waiters is very rare. We don't want to walk
diff --git a/net/rds/message.c b/net/rds/message.c
index 799034e0f513..44dbc612ef54 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -354,7 +354,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
for (i = 0; i < rm->data.op_nents; ++i) {
sg_set_page(&rm->data.op_sg[i],
- virt_to_page(page_addrs[i]),
+ virt_to_page((void *)page_addrs[i]),
PAGE_SIZE, 0);
}
@@ -391,7 +391,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
size_t start;
ssize_t copied;
- copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+ copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE,
1, &start);
if (copied < 0) {
struct mmpin *mmp;
@@ -405,7 +405,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
goto err;
}
total_copied += copied;
- iov_iter_advance(from, copied);
length -= copied;
sg_set_page(sg, pages, copied, start);
rm->data.op_nents++;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 6f1a50d50d06..fba82d36593a 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -742,7 +742,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
NULL, 0, rs, &local_odp_mr->r_key, NULL,
iov->addr, iov->bytes, ODP_VIRTUAL);
if (IS_ERR(local_odp_mr->r_trans_private)) {
- ret = IS_ERR(local_odp_mr->r_trans_private);
+ ret = PTR_ERR(local_odp_mr->r_trans_private);
rdsdebug("get_mr ret %d %p\"", ret,
local_odp_mr->r_trans_private);
kfree(local_odp_mr);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index a9e4ff948a7d..d36f3f6b4351 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -291,7 +291,7 @@ static void rds_rdma_listen_stop(void)
#endif
}
-static int rds_rdma_init(void)
+static int __init rds_rdma_init(void)
{
int ret;
@@ -307,7 +307,7 @@ out:
}
module_init(rds_rdma_init);
-static void rds_rdma_exit(void)
+static void __exit rds_rdma_exit(void)
{
/* stop listening first to ensure no new connections are attempted */
rds_rdma_listen_stop();
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5327d130c4b5..4444fd82b66d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -166,10 +166,10 @@ void rds_tcp_reset_callbacks(struct socket *sock,
*/
atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
- lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
+ lock_sock(osock->sk);
if (tc->t_tinc) {
rds_inc_put(&tc->t_tinc->ti_inc);
tc->t_tinc = NULL;
@@ -487,14 +487,27 @@ struct rds_tcp_net {
/* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation.
*/
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
+ /* TCP timer functions might access net namespace even after
+ * a process which created this net namespace terminated.
+ */
+ if (!sk->sk_net_refcnt) {
+ if (!maybe_get_net(net)) {
+ release_sock(sk);
+ return false;
+ }
+ sk->sk_net_refcnt = 1;
+ netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+ }
+ rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@@ -504,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
release_sock(sk);
+ return true;
}
static void rds_tcp_accept_worker(struct work_struct *work)
@@ -698,7 +712,7 @@ static void rds_tcp_exit(void)
}
module_exit(rds_tcp_exit);
-static int rds_tcp_init(void)
+static int __init rds_tcp_init(void)
{
int ret;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index dc8d745d6857..f8b5930d7b34 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -49,7 +49,7 @@ struct rds_tcp_statistics {
};
/* tcp.c */
-void rds_tcp_tune(struct socket *sock);
+bool rds_tcp_tune(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5461d77fff4f..f0c477c5d1db 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
if (ret < 0)
goto out;
- rds_tcp_tune(sock);
+ if (!rds_tcp_tune(sock)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (isv6) {
sin6.sin6_family = AF_INET6;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 09cadd556d1e..7edf2e69d3fe 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
__module_get(new_sock->ops->owner);
rds_tcp_keepalive(new_sock);
- rds_tcp_tune(new_sock);
+ if (!rds_tcp_tune(new_sock)) {
+ ret = -EINVAL;
+ goto out;
+ }
inet = inet_sk(new_sock->sk);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 5b1927d66f0d..dac4fdc7488a 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -78,6 +78,7 @@ struct rfkill_data {
struct mutex mtx;
wait_queue_head_t read_wait;
bool input_handler;
+ u8 max_size;
};
@@ -1153,6 +1154,8 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
if (!data)
return -ENOMEM;
+ data->max_size = RFKILL_EVENT_SIZE_V1;
+
INIT_LIST_HEAD(&data->events);
mutex_init(&data->mtx);
init_waitqueue_head(&data->read_wait);
@@ -1235,6 +1238,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
list);
sz = min_t(unsigned long, sizeof(ev->ev), count);
+ sz = min_t(unsigned long, sz, data->max_size);
ret = sz;
if (copy_to_user(buf, &ev->ev, sz))
ret = -EFAULT;
@@ -1249,6 +1253,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
+ struct rfkill_data *data = file->private_data;
struct rfkill *rfkill;
struct rfkill_event_ext ev;
int ret;
@@ -1263,6 +1268,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
* our API version even in a write() call, if it cares.
*/
count = min(count, sizeof(ev));
+ count = min_t(size_t, count, data->max_size);
if (copy_from_user(&ev, buf, count))
return -EFAULT;
@@ -1322,31 +1328,47 @@ static int rfkill_fop_release(struct inode *inode, struct file *file)
return 0;
}
-#ifdef CONFIG_RFKILL_INPUT
static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct rfkill_data *data = file->private_data;
+ int ret = -ENOSYS;
+ u32 size;
if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
return -ENOSYS;
- if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT)
- return -ENOSYS;
-
mutex_lock(&data->mtx);
-
- if (!data->input_handler) {
- if (atomic_inc_return(&rfkill_input_disabled) == 1)
- printk(KERN_DEBUG "rfkill: input handler disabled\n");
- data->input_handler = true;
+ switch (_IOC_NR(cmd)) {
+#ifdef CONFIG_RFKILL_INPUT
+ case RFKILL_IOC_NOINPUT:
+ if (!data->input_handler) {
+ if (atomic_inc_return(&rfkill_input_disabled) == 1)
+ printk(KERN_DEBUG "rfkill: input handler disabled\n");
+ data->input_handler = true;
+ }
+ ret = 0;
+ break;
+#endif
+ case RFKILL_IOC_MAX_SIZE:
+ if (get_user(size, (__u32 __user *)arg)) {
+ ret = -EFAULT;
+ break;
+ }
+ if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) {
+ ret = -EINVAL;
+ break;
+ }
+ data->max_size = size;
+ ret = 0;
+ break;
+ default:
+ break;
}
-
mutex_unlock(&data->mtx);
- return 0;
+ return ret;
}
-#endif
static const struct file_operations rfkill_fops = {
.owner = THIS_MODULE,
@@ -1355,10 +1377,8 @@ static const struct file_operations rfkill_fops = {
.write = rfkill_fop_write,
.poll = rfkill_fop_poll,
.release = rfkill_fop_release,
-#ifdef CONFIG_RFKILL_INPUT
.unlocked_ioctl = rfkill_fop_ioctl,
.compat_ioctl = compat_ptr_ioctl,
-#endif
.llseek = no_llseek,
};
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 30a1cf4c16c6..36fefc3957d7 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -192,6 +192,7 @@ static void rose_kill_by_device(struct net_device *dev)
rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
if (rose->neighbour)
rose->neighbour->use--;
+ netdev_put(rose->device, &rose->dev_tracker);
rose->device = NULL;
}
}
@@ -592,6 +593,8 @@ static struct sock *rose_make_new(struct sock *osk)
rose->idle = orose->idle;
rose->defer = orose->defer;
rose->device = orose->device;
+ if (rose->device)
+ netdev_hold(rose->device, &rose->dev_tracker, GFP_ATOMIC);
rose->qbitincl = orose->qbitincl;
return sk;
@@ -645,6 +648,7 @@ static int rose_release(struct socket *sock)
break;
}
+ netdev_put(rose->device, &rose->dev_tracker);
sock->sk = NULL;
release_sock(sk);
sock_put(sk);
@@ -696,6 +700,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
rose->source_addr = addr->srose_addr;
rose->device = dev;
+ netdev_tracker_alloc(rose->device, &rose->dev_tracker, GFP_KERNEL);
rose->source_ndigis = addr->srose_ndigis;
if (addr_len == sizeof(struct full_sockaddr_rose)) {
@@ -721,7 +726,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
struct rose_sock *rose = rose_sk(sk);
struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
unsigned char cause, diagnostic;
- struct net_device *dev;
ax25_uid_assoc *user;
int n, err = 0;
@@ -778,9 +782,12 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
}
if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
+ struct net_device *dev;
+
sock_reset_flag(sk, SOCK_ZAPPED);
- if ((dev = rose_dev_first()) == NULL) {
+ dev = rose_dev_first();
+ if (!dev) {
err = -ENETUNREACH;
goto out_release;
}
@@ -788,12 +795,15 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
user = ax25_findbyuid(current_euid());
if (!user) {
err = -EINVAL;
+ dev_put(dev);
goto out_release;
}
memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN);
rose->source_call = user->call;
rose->device = dev;
+ netdev_tracker_alloc(rose->device, &rose->dev_tracker,
+ GFP_KERNEL);
ax25_uid_put(user);
rose_insert_socket(sk); /* Finish the bind */
@@ -1017,6 +1027,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
make_rose->source_digis[n] = facilities.source_digis[n];
make_rose->neighbour = neigh;
make_rose->device = dev;
+ /* Caller got a reference for us. */
+ netdev_tracker_alloc(make_rose->device, &make_rose->dev_tracker,
+ GFP_ATOMIC);
make_rose->facilities = facilities;
make_rose->neighbour->use++;
@@ -1230,7 +1243,8 @@ static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return -ENOTCONN;
/* Now we can treat all alike */
- if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL)
+ skb = skb_recv_datagram(sk, flags, &er);
+ if (!skb)
return er;
qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index 8b96a56d3a49..0f77ae8ef944 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -236,6 +236,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns
unsigned char *dptr;
int len;
+ if (!neigh->dev)
+ return;
+
len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 11c45c8c6c16..036d92c0ad79 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused)
}
if (frametype == ROSE_CALL_REQUEST) {
- if (!rose_loopback_neigh->dev) {
+ if (!rose_loopback_neigh->dev &&
+ !rose_loopback_neigh->loopback) {
kfree_skb(skb);
continue;
}
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index e2e6b6b78578..fee772b4637c 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -227,8 +227,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
{
struct rose_neigh *s;
- rose_stop_ftimer(rose_neigh);
- rose_stop_t0timer(rose_neigh);
+ del_timer_sync(&rose_neigh->ftimer);
+ del_timer_sync(&rose_neigh->t0timer);
skb_queue_purge(&rose_neigh->queue);
@@ -615,6 +615,8 @@ struct net_device *rose_dev_first(void)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
}
+ if (first)
+ dev_hold(first);
rcu_read_unlock();
return first;
@@ -1128,22 +1130,15 @@ static int rose_node_show(struct seq_file *seq, void *v)
seq_puts(seq, "address mask n neigh neigh neigh\n");
else {
const struct rose_node *rose_node = v;
- /* if (rose_node->loopback) {
- seq_printf(seq, "%-10s %04d 1 loopback\n",
- rose2asc(rsbuf, &rose_node->address),
- rose_node->mask);
- } else { */
- seq_printf(seq, "%-10s %04d %d",
- rose2asc(rsbuf, &rose_node->address),
- rose_node->mask,
- rose_node->count);
-
- for (i = 0; i < rose_node->count; i++)
- seq_printf(seq, " %05d",
- rose_node->neighbour[i]->number);
-
- seq_puts(seq, "\n");
- /* } */
+ seq_printf(seq, "%-10s %04d %d",
+ rose2asc(rsbuf, &rose_node->address),
+ rose_node->mask,
+ rose_node->count);
+
+ for (i = 0; i < rose_node->count; i++)
+ seq_printf(seq, " %05d", rose_node->neighbour[i]->number);
+
+ seq_puts(seq, "\n");
}
return 0;
}
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index b3138fc2e552..f06ddbed3fed 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *);
void rose_start_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
sk->sk_timer.function = rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
}
void rose_start_t1timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t2timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t3timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_hbtimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_idletimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->idletimer);
+ sk_stop_timer(sk, &rose->idletimer);
if (rose->idle > 0) {
rose->idletimer.function = rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
- add_timer(&rose->idletimer);
+ sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
}
}
void rose_stop_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
void rose_stop_timer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->timer);
+ sk_stop_timer(sk, &rose_sk(sk)->timer);
}
void rose_stop_idletimer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->idletimer);
+ sk_stop_timer(sk, &rose_sk(sk)->idletimer);
}
static void rose_heartbeat_expiry(struct timer_list *t)
@@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
bh_unlock_sock(sk);
rose_destroy_socket(sk);
+ sock_put(sk);
return;
}
break;
@@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
rose_start_heartbeat(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_timer_expiry(struct timer_list *t)
@@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t)
break;
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_idletimer_expiry(struct timer_list *t)
@@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2b5f89713e36..ceba28e9dce6 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -351,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
*/
void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
{
- _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+ _enter("%d{%d}", call->debug_id, refcount_read(&call->ref));
mutex_lock(&call->user_mutex);
rxrpc_release_call(rxrpc_sk(sock->sk), call);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7bd6f8a66a3e..1ad0ec5afb50 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -15,14 +15,6 @@
#include <keys/rxrpc-type.h>
#include "protocol.h"
-#if 0
-#define CHECK_SLAB_OKAY(X) \
- BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
- (POISON_FREE << 8 | POISON_FREE))
-#else
-#define CHECK_SLAB_OKAY(X) do {} while (0)
-#endif
-
#define FCRYPT_BSIZE 8
struct rxrpc_crypt {
union {
@@ -68,7 +60,7 @@ struct rxrpc_net {
struct proc_dir_entry *proc_net; /* Subdir in /proc/net */
u32 epoch; /* Local epoch for detecting local-end reset */
struct list_head calls; /* List of calls active in this namespace */
- rwlock_t call_lock; /* Lock for ->calls */
+ spinlock_t call_lock; /* Lock for ->calls */
atomic_t nr_calls; /* Count of allocated calls */
atomic_t nr_conns;
@@ -88,7 +80,7 @@ struct rxrpc_net {
struct work_struct client_conn_reaper;
struct timer_list client_conn_reap_timer;
- struct list_head local_endpoints;
+ struct hlist_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
DECLARE_HASHTABLE (peer_hash, 10);
@@ -279,9 +271,9 @@ struct rxrpc_security {
struct rxrpc_local {
struct rcu_head rcu;
atomic_t active_users; /* Number of users of the local endpoint */
- atomic_t usage; /* Number of references to the structure */
+ refcount_t ref; /* Number of references to the structure */
struct rxrpc_net *rxnet; /* The network ns in which this resides */
- struct list_head link;
+ struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
@@ -304,7 +296,7 @@ struct rxrpc_local {
*/
struct rxrpc_peer {
struct rcu_head rcu; /* This must be first */
- atomic_t usage;
+ refcount_t ref;
unsigned long hash_key;
struct hlist_node hash_link;
struct rxrpc_local *local;
@@ -406,7 +398,7 @@ enum rxrpc_conn_proto_state {
*/
struct rxrpc_bundle {
struct rxrpc_conn_parameters params;
- atomic_t usage;
+ refcount_t ref;
unsigned int debug_id;
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
@@ -427,7 +419,7 @@ struct rxrpc_connection {
struct rxrpc_conn_proto proto;
struct rxrpc_conn_parameters params;
- atomic_t usage;
+ refcount_t ref;
struct rcu_head rcu;
struct list_head cache_link;
@@ -609,7 +601,7 @@ struct rxrpc_call {
int error; /* Local error incurred */
enum rxrpc_call_state state; /* current state of call */
enum rxrpc_call_completion completion; /* Call completion condition */
- atomic_t usage;
+ refcount_t ref;
u16 service_id; /* service ID */
u8 security_ix; /* Security type */
enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
@@ -676,13 +668,12 @@ struct rxrpc_call {
spinlock_t input_lock; /* Lock for packet input to this call */
- /* receive-phase ACK management */
+ /* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- rxrpc_serial_t ackr_first_seq; /* first sequence number received */
- rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
- rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
- rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
+ rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */
+ atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
/* RTT management */
rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */
@@ -692,8 +683,10 @@ struct rxrpc_call {
#define RXRPC_CALL_RTT_AVAIL_MASK 0xf
#define RXRPC_CALL_RTT_PEND_SHIFT 8
- /* transmission-phase ACK management */
+ /* Transmission-phase ACK management (ACKs we've received). */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
+ rxrpc_seq_t acks_first_seq; /* first sequence number received */
+ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
@@ -777,21 +770,18 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
-static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why)
-{
- trace_rxrpc_timer(call, why, now);
- timer_reduce(&call->timer, expire_at);
-}
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why);
+
+void rxrpc_delete_call_timer(struct rxrpc_call *call);
/*
* call_object.c
*/
extern const char *const rxrpc_call_states[];
extern const char *const rxrpc_call_completions[];
-extern unsigned int rxrpc_max_call_lifetime;
extern struct kmem_cache *rxrpc_call_jar;
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
@@ -808,6 +798,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
bool __rxrpc_queue_call(struct rxrpc_call *);
bool rxrpc_queue_call(struct rxrpc_call *);
void rxrpc_see_call(struct rxrpc_call *);
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_cleanup_call(struct rxrpc_call *);
@@ -990,6 +981,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
/*
* peer_event.c
*/
+void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
void rxrpc_error_report(struct sock *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
@@ -1015,6 +1007,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *);
extern const struct seq_operations rxrpc_call_seq_ops;
extern const struct seq_operations rxrpc_connection_seq_ops;
extern const struct seq_operations rxrpc_peer_seq_ops;
+extern const struct seq_operations rxrpc_local_seq_ops;
/*
* recvmsg.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 1ae90fb97936..99e10eea3732 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -91,7 +91,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
(head + 1) & (size - 1));
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- atomic_read(&conn->usage), here);
+ refcount_read(&conn->ref), here);
}
/* Now it gets complicated, because calls get registered with the
@@ -104,7 +104,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->state = RXRPC_CALL_SERVER_PREALLOC;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)user_call_ID);
write_lock(&rx->call_lock);
@@ -140,9 +140,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- write_lock(&rxnet->call_lock);
- list_add_tail(&call->link, &rxnet->calls);
- write_unlock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
+ list_add_tail_rcu(&call->link, &rxnet->calls);
+ spin_unlock_bh(&rxnet->call_lock);
b->call_backlog[call_head] = call;
smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 6be2672a65ea..2a93e7b5fbd0 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct sk_buff *skb;
- unsigned long resend_at, rto_j;
+ unsigned long resend_at;
rxrpc_seq_t cursor, seq, top;
ktime_t now, max_age, oldest, ack_ts;
int ix;
@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- rto_j = call->peer->rto_j;
-
now = ktime_get_real();
- max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
+ max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
spin_lock_bh(&call->lock);
@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rto_j;
+ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
@@ -312,7 +310,7 @@ recheck_state:
}
if (call->state == RXRPC_CALL_COMPLETE) {
- del_timer_sync(&call->timer);
+ rxrpc_delete_call_timer(call);
goto out_put;
}
@@ -379,9 +377,9 @@ recheck_state:
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
} else {
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
}
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
@@ -408,7 +406,8 @@ recheck_state:
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
+ call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_resend(call, now);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 4eb91d958a48..6401cdf7a624 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
if (call->state < RXRPC_CALL_COMPLETE) {
trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
- rxrpc_queue_call(call);
+ __rxrpc_queue_call(call);
+ } else {
+ rxrpc_put_call(call, rxrpc_call_put);
}
}
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why)
+{
+ if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
+ trace_rxrpc_timer(call, why, now);
+ if (timer_reduce(&call->timer, expire_at))
+ rxrpc_put_call(call, rxrpc_call_put_notimer);
+ }
+}
+
+void rxrpc_delete_call_timer(struct rxrpc_call *call)
+{
+ if (del_timer_sync(&call->timer))
+ rxrpc_put_call(call, rxrpc_call_put_timer);
+}
+
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
/*
@@ -92,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
found_extant_call:
rxrpc_get_call(call, rxrpc_call_got);
read_unlock(&rx->call_lock);
- _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ _leave(" = %p [%d]", call, refcount_read(&call->ref));
return call;
}
@@ -140,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->input_lock);
rwlock_init(&call->state_lock);
- atomic_set(&call->usage, 1);
+ refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
@@ -265,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);
limiter = rxrpc_get_call_slot(p, gfp);
- if (!limiter)
+ if (!limiter) {
+ release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
+ }
call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
@@ -279,7 +301,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)p->user_call_ID);
if (p->kernel)
__set_bit(RXRPC_CALL_KERNEL, &call->flags);
@@ -317,9 +339,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- write_lock(&rxnet->call_lock);
- list_add_tail(&call->link, &rxnet->calls);
- write_unlock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
+ list_add_tail_rcu(&call->link, &rxnet->calls);
+ spin_unlock_bh(&rxnet->call_lock);
/* From this point on, the call is protected by its own lock. */
release_sock(&rx->sk);
@@ -332,7 +354,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
goto error_attached_to_socket;
trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
- atomic_read(&call->usage), here, NULL);
+ refcount_read(&call->ref), here, NULL);
rxrpc_start_call_timer(call);
@@ -352,7 +374,7 @@ error_dup_user_ID:
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, -EEXIST);
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
+ refcount_read(&call->ref), here, ERR_PTR(-EEXIST));
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
@@ -366,7 +388,7 @@ error_dup_user_ID:
*/
error_attached_to_socket:
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(ret));
+ refcount_read(&call->ref), here, ERR_PTR(ret));
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
@@ -422,8 +444,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
bool rxrpc_queue_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&call->usage, 1, 0);
- if (n == 0)
+ int n;
+
+ if (!__refcount_inc_not_zero(&call->ref, &n))
return false;
if (rxrpc_queue_work(&call->processor))
trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
@@ -439,7 +462,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call)
bool __rxrpc_queue_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
- int n = atomic_read(&call->usage);
+ int n = refcount_read(&call->ref);
ASSERTCMP(n, >=, 1);
if (rxrpc_queue_work(&call->processor))
trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
@@ -456,22 +479,34 @@ void rxrpc_see_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
if (call) {
- int n = atomic_read(&call->usage);
+ int n = refcount_read(&call->ref);
trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
here, NULL);
}
}
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ if (!__refcount_inc_not_zero(&call->ref, &n))
+ return false;
+ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
+ return true;
+}
+
/*
* Note the addition of a ref on a call.
*/
void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
const void *here = __builtin_return_address(0);
- int n = atomic_inc_return(&call->usage);
+ int n;
- trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+ __refcount_inc(&call->ref, &n);
+ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
}
/*
@@ -496,10 +531,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
struct rxrpc_connection *conn = call->conn;
bool put = false;
- _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+ _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
trace_rxrpc_call(call->debug_id, rxrpc_call_release,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)call->flags);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -510,8 +545,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
spin_unlock_bh(&call->lock);
rxrpc_put_call_slot(call);
-
- del_timer_sync(&call->timer);
+ rxrpc_delete_call_timer(call);
/* Make sure we don't get any more notifications */
write_lock_bh(&rx->recvmsg_lock);
@@ -589,21 +623,21 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
struct rxrpc_net *rxnet = call->rxnet;
const void *here = __builtin_return_address(0);
unsigned int debug_id = call->debug_id;
+ bool dead;
int n;
ASSERT(call != NULL);
- n = atomic_dec_return(&call->usage);
+ dead = __refcount_dec_and_test(&call->ref, &n);
trace_rxrpc_call(debug_id, op, n, here, NULL);
- ASSERTCMP(n, >=, 0);
- if (n == 0) {
+ if (dead) {
_debug("call %d dead", call->debug_id);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
- write_lock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
list_del_init(&call->link);
- write_unlock(&rxnet->call_lock);
+ spin_unlock_bh(&rxnet->call_lock);
}
rxrpc_cleanup_call(call);
@@ -618,6 +652,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
struct rxrpc_net *rxnet = call->rxnet;
+ rxrpc_delete_call_timer(call);
+
rxrpc_put_connection(call->conn);
rxrpc_put_peer(call->peer);
kfree(call->rxtx_buffer);
@@ -652,8 +688,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- del_timer_sync(&call->timer);
-
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
@@ -675,7 +709,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
if (!list_empty(&rxnet->calls)) {
- write_lock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
while (!list_empty(&rxnet->calls)) {
call = list_entry(rxnet->calls.next,
@@ -686,16 +720,16 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
list_del_init(&call->link);
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
+ call, refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->flags, call->events);
- write_unlock(&rxnet->call_lock);
+ spin_unlock_bh(&rxnet->call_lock);
cond_resched();
- write_lock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
}
- write_unlock(&rxnet->call_lock);
+ spin_unlock_bh(&rxnet->call_lock);
}
atomic_dec(&rxnet->nr_calls);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 8120138dac01..3c9eeb5b750c 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -102,7 +102,7 @@ void rxrpc_destroy_client_conn_ids(void)
if (!idr_is_empty(&rxrpc_client_conn_ids)) {
idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
- conn, atomic_read(&conn->usage));
+ conn, refcount_read(&conn->ref));
}
BUG();
}
@@ -122,7 +122,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
if (bundle) {
bundle->params = *cp;
rxrpc_get_peer(bundle->params.peer);
- atomic_set(&bundle->usage, 1);
+ refcount_set(&bundle->ref, 1);
spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
}
@@ -131,7 +131,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
{
- atomic_inc(&bundle->usage);
+ refcount_inc(&bundle->ref);
return bundle;
}
@@ -144,10 +144,13 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
{
unsigned int d = bundle->debug_id;
- unsigned int u = atomic_dec_return(&bundle->usage);
+ bool dead;
+ int r;
- _debug("PUT B=%x %u", d, u);
- if (u == 0)
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+
+ _debug("PUT B=%x %d", d, r);
+ if (dead)
rxrpc_free_bundle(bundle);
}
@@ -169,7 +172,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
return ERR_PTR(-ENOMEM);
}
- atomic_set(&conn->usage, 1);
+ refcount_set(&conn->ref, 1);
conn->bundle = bundle;
conn->params = bundle->params;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
@@ -195,7 +198,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
key_get(conn->params.key);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
__builtin_return_address(0));
atomic_inc(&rxnet->nr_client_conns);
@@ -966,14 +969,13 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = conn->debug_id;
- int n;
+ bool dead;
+ int r;
- n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
- if (n <= 0) {
- ASSERTCMP(n, >=, 0);
+ dead = __refcount_dec_and_test(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here);
+ if (dead)
rxrpc_kill_client_conn(conn);
- }
}
/*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index b2159dbf5412..22089e37e97f 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -104,7 +104,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
goto not_found;
*_peer = peer;
conn = rxrpc_find_service_conn_rcu(peer, skb);
- if (!conn || atomic_read(&conn->usage) == 0)
+ if (!conn || refcount_read(&conn->ref) == 0)
goto not_found;
_leave(" = %p", conn);
return conn;
@@ -114,7 +114,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
*/
conn = idr_find(&rxrpc_client_conn_ids,
sp->hdr.cid >> RXRPC_CIDSHIFT);
- if (!conn || atomic_read(&conn->usage) == 0) {
+ if (!conn || refcount_read(&conn->ref) == 0) {
_debug("no conn");
goto not_found;
}
@@ -183,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
break;
default:
- chan->last_abort = RX_USER_ABORT;
+ chan->last_abort = RX_CALL_DEAD;
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
break;
}
@@ -263,11 +263,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
bool rxrpc_queue_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
- if (n == 0)
+ int r;
+
+ if (!__refcount_inc_not_zero(&conn->ref, &r))
return false;
if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here);
else
rxrpc_put_connection(conn);
return true;
@@ -280,7 +281,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
if (conn) {
- int n = atomic_read(&conn->usage);
+ int n = refcount_read(&conn->ref);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
}
@@ -292,9 +293,10 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
- int n = atomic_inc_return(&conn->usage);
+ int r;
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
+ __refcount_inc(&conn->ref, &r);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here);
return conn;
}
@@ -305,11 +307,11 @@ struct rxrpc_connection *
rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ int r;
if (conn) {
- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
- if (n > 0)
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
+ if (__refcount_inc_not_zero(&conn->ref, &r))
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here);
else
conn = NULL;
}
@@ -333,12 +335,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = conn->debug_id;
- int n;
+ int r;
- n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
- ASSERTCMP(n, >=, 0);
- if (n == 1)
+ __refcount_dec(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here);
+ if (r - 1 == 1)
rxrpc_set_service_reap_timer(conn->params.local->rxnet,
jiffies + rxrpc_connection_expiry);
}
@@ -351,9 +352,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
struct rxrpc_connection *conn =
container_of(rcu, struct rxrpc_connection, rcu);
- _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
+ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
_net("DESTROY CONN %d", conn->debug_id);
@@ -392,8 +393,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
- if (likely(atomic_read(&conn->usage) > 1))
+ ASSERTCMP(refcount_read(&conn->ref), >, 0);
+ if (likely(refcount_read(&conn->ref) > 1))
continue;
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
@@ -405,7 +406,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
_debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
+ conn->debug_id, refcount_read(&conn->ref),
(long)expire_at - (long)now);
if (time_before(now, expire_at)) {
@@ -418,7 +419,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
/* The usage count sits at 1 whilst the object is unused on the
* list; we reduce that to 0 to make the object unavailable.
*/
- if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+ if (!refcount_dec_if_one(&conn->ref))
continue;
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
@@ -442,7 +443,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
link);
list_del_init(&conn->link);
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
rxrpc_kill_connection(conn);
}
@@ -470,7 +471,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
- conn, atomic_read(&conn->usage));
+ conn, refcount_read(&conn->ref));
leak = true;
}
write_unlock(&rxnet->conn_lock);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index e1966dfc9152..6e6aa02c6f9e 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -9,7 +9,7 @@
#include "ar-internal.h"
static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .usage = ATOMIC_INIT(1),
+ .ref = REFCOUNT_INIT(1),
.debug_id = UINT_MAX,
.channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
};
@@ -99,7 +99,7 @@ conn_published:
return;
found_extant_conn:
- if (atomic_read(&cursor->usage) == 0)
+ if (refcount_read(&cursor->ref) == 0)
goto replace_old_connection;
write_sequnlock_bh(&peer->service_conn_lock);
/* We should not be able to get here. rxrpc_incoming_connection() is
@@ -132,7 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
* the rxrpc_connections list.
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
- atomic_set(&conn->usage, 2);
+ refcount_set(&conn->ref, 2);
conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle);
atomic_inc(&rxnet->nr_conns);
@@ -142,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
write_unlock(&rxnet->conn_lock);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
__builtin_return_address(0));
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index dc201363f2c4..721d847ba92b 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -412,8 +412,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
enum rxrpc_call_state state;
- unsigned int j, nr_subpackets;
- rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
+ unsigned int j, nr_subpackets, nr_unacked = 0;
+ rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial;
rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack;
bool immediate_ack = false, jumbo_bad = false;
u8 ack = 0;
@@ -453,7 +453,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
!rxrpc_receiving_reply(call))
goto unlock;
- call->ackr_prev_seq = seq0;
hard_ack = READ_ONCE(call->rx_hard_ack);
nr_subpackets = sp->nr_subpackets;
@@ -534,6 +533,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
ack_serial = serial;
}
+ if (after(seq0, call->ackr_highest_seq))
+ call->ackr_highest_seq = seq0;
+
/* Queue the packet. We use a couple of memory barriers here as need
* to make sure that rx_top is perceived to be set after the buffer
* pointer and that the buffer pointer is set after the annotation and
@@ -567,6 +569,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
sp = NULL;
}
+ nr_unacked++;
+
if (last) {
set_bit(RXRPC_CALL_RX_LAST, &call->flags);
if (!ack) {
@@ -586,9 +590,14 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
}
call->rx_expect_next = seq + 1;
}
+ if (!ack)
+ ack_serial = serial;
}
ack:
+ if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack)
+ ack = RXRPC_ACK_IDLE;
+
if (ack)
rxrpc_propose_ACK(call, ack, ack_serial,
immediate_ack, true,
@@ -812,7 +821,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
{
- rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
+ rxrpc_seq_t base = READ_ONCE(call->acks_first_seq);
if (after(first_pkt, base))
return true; /* The window advanced */
@@ -820,7 +829,7 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
if (before(first_pkt, base))
return false; /* firstPacket regressed */
- if (after_eq(prev_pkt, call->ackr_prev_seq))
+ if (after_eq(prev_pkt, call->acks_prev_seq))
return true; /* previousPacket hasn't regressed. */
/* Some rx implementations put a serial number in previousPacket. */
@@ -903,11 +912,38 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_respond_to_ack);
}
+ /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
+ * indicates that the client address changed due to NAT. The server
+ * lost the call because it switched to a different peer.
+ */
+ if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ first_soft_ack == 1 &&
+ prev_pkt == 0 &&
+ rxrpc_is_client_call(call)) {
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ 0, -ENETRESET);
+ return;
+ }
+
+ /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
+ * indicate a change of address. However, we can retransmit the call
+ * if we still have it buffered to the beginning.
+ */
+ if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ first_soft_ack == 1 &&
+ prev_pkt == 0 &&
+ call->tx_hard_ack == 0 &&
+ rxrpc_is_client_call(call)) {
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ 0, -ENETRESET);
+ return;
+ }
+
/* Discard any out-of-order or duplicate ACKs (outside lock). */
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->ackr_first_seq,
- prev_pkt, call->ackr_prev_seq);
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
return;
}
@@ -922,14 +958,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
/* Discard any out-of-order or duplicate ACKs (inside lock). */
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->ackr_first_seq,
- prev_pkt, call->ackr_prev_seq);
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
goto out;
}
call->acks_latest_ts = skb->tstamp;
- call->ackr_first_seq = first_soft_ack;
- call->ackr_prev_seq = prev_pkt;
+ call->acks_first_seq = first_soft_ack;
+ call->acks_prev_seq = prev_pkt;
/* Parse rwind and mtu sizes if provided. */
if (buf.info.rxMTU)
@@ -1154,8 +1190,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
*/
static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
- CHECK_SLAB_OKAY(&local->usage);
-
if (rxrpc_get_local_maybe(local)) {
skb_queue_tail(&local->reject_queue, skb);
rxrpc_queue_local(local);
@@ -1413,7 +1447,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
}
}
- if (!call || atomic_read(&call->usage) == 0) {
+ if (!call || refcount_read(&call->ref) == 0) {
if (rxrpc_to_client(sp) ||
sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
goto bad_message;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a4111408ffd0..38ea98ff426b 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -79,10 +79,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
- atomic_set(&local->usage, 1);
+ refcount_set(&local->ref, 1);
atomic_set(&local->active_users, 1);
local->rxnet = rxnet;
- INIT_LIST_HEAD(&local->link);
+ INIT_HLIST_NODE(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->reject_queue);
@@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
local, srx->transport_type, srx->transport.family);
udp_conf.family = srx->transport.family;
+ udp_conf.use_udp_checksums = true;
if (udp_conf.family == AF_INET) {
udp_conf.local_ip = srx->transport.sin.sin_addr;
udp_conf.local_udp_port = srx->transport.sin.sin_port;
@@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
} else {
udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
+ udp_conf.use_udp6_tx_checksums = true;
+ udp_conf.use_udp6_rx_checksums = true;
#endif
}
ret = udp_sock_create(net, &udp_conf, &local->socket);
@@ -134,6 +137,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
tuncfg.encap_type = UDP_ENCAP_RXRPC;
tuncfg.encap_rcv = rxrpc_input_packet;
+ tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
tuncfg.sk_user_data = local;
setup_udp_tunnel_sock(net, local->socket, &tuncfg);
@@ -177,7 +181,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
{
struct rxrpc_local *local;
struct rxrpc_net *rxnet = rxrpc_net(net);
- struct list_head *cursor;
+ struct hlist_node *cursor;
const char *age;
long diff;
int ret;
@@ -187,16 +191,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
mutex_lock(&rxnet->local_mutex);
- for (cursor = rxnet->local_endpoints.next;
- cursor != &rxnet->local_endpoints;
- cursor = cursor->next) {
- local = list_entry(cursor, struct rxrpc_local, link);
+ hlist_for_each(cursor, &rxnet->local_endpoints) {
+ local = hlist_entry(cursor, struct rxrpc_local, link);
diff = rxrpc_local_cmp_key(local, srx);
- if (diff < 0)
+ if (diff != 0)
continue;
- if (diff > 0)
- break;
/* Services aren't allowed to share transport sockets, so
* reject that here. It is possible that the object is dying -
@@ -208,9 +208,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
goto addr_in_use;
}
- /* Found a match. We replace a dying object. Attempting to
- * bind the transport socket may still fail if we're attempting
- * to use a local address that the dying object is still using.
+ /* Found a match. We want to replace a dying object.
+ * Attempting to bind the transport socket may still fail if
+ * we're attempting to use a local address that the dying
+ * object is still using.
*/
if (!rxrpc_use_local(local))
break;
@@ -227,10 +228,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
if (ret < 0)
goto sock_error;
- if (cursor != &rxnet->local_endpoints)
- list_replace_init(cursor, &local->link);
- else
- list_add_tail(&local->link, cursor);
+ if (cursor) {
+ hlist_replace_rcu(cursor, &local->link);
+ cursor->pprev = NULL;
+ } else {
+ hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
+ }
age = "new";
found:
@@ -263,10 +266,10 @@ addr_in_use:
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
- int n;
+ int r;
- n = atomic_inc_return(&local->usage);
- trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
+ __refcount_inc(&local->ref, &r);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here);
return local;
}
@@ -276,12 +279,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
+ int r;
if (local) {
- int n = atomic_fetch_add_unless(&local->usage, 1, 0);
- if (n > 0)
+ if (__refcount_inc_not_zero(&local->ref, &r))
trace_rxrpc_local(local->debug_id, rxrpc_local_got,
- n + 1, here);
+ r + 1, here);
else
local = NULL;
}
@@ -295,10 +298,10 @@ void rxrpc_queue_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = local->debug_id;
- int n = atomic_read(&local->usage);
+ int r = refcount_read(&local->ref);
if (rxrpc_queue_work(&local->processor))
- trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
+ trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here);
else
rxrpc_put_local(local);
}
@@ -310,15 +313,16 @@ void rxrpc_put_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id;
- int n;
+ bool dead;
+ int r;
if (local) {
debug_id = local->debug_id;
- n = atomic_dec_return(&local->usage);
- trace_rxrpc_local(debug_id, rxrpc_local_put, n, here);
+ dead = __refcount_dec_and_test(&local->ref, &r);
+ trace_rxrpc_local(debug_id, rxrpc_local_put, r, here);
- if (n == 0)
+ if (dead)
call_rcu(&local->rcu, rxrpc_local_rcu);
}
}
@@ -371,7 +375,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
local->dead = true;
mutex_lock(&rxnet->local_mutex);
- list_del_init(&local->link);
+ hlist_del_init_rcu(&local->link);
mutex_unlock(&rxnet->local_mutex);
rxrpc_clean_up_local_conns(local);
@@ -402,8 +406,11 @@ static void rxrpc_local_processor(struct work_struct *work)
container_of(work, struct rxrpc_local, processor);
bool again;
+ if (local->dead)
+ return;
+
trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
- atomic_read(&local->usage), NULL);
+ refcount_read(&local->ref), NULL);
do {
again = false;
@@ -455,11 +462,11 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
flush_workqueue(rxrpc_workqueue);
- if (!list_empty(&rxnet->local_endpoints)) {
+ if (!hlist_empty(&rxnet->local_endpoints)) {
mutex_lock(&rxnet->local_mutex);
- list_for_each_entry(local, &rxnet->local_endpoints, link) {
+ hlist_for_each_entry(local, &rxnet->local_endpoints, link) {
pr_err("AF_RXRPC: Leaked local %p {%d}\n",
- local, atomic_read(&local->usage));
+ local, refcount_read(&local->ref));
}
mutex_unlock(&rxnet->local_mutex);
BUG();
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 25bbc4cc8b13..bb4c25d6df64 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net)
rxnet->epoch |= RXRPC_RANDOM_EPOCH;
INIT_LIST_HEAD(&rxnet->calls);
- rwlock_init(&rxnet->call_lock);
+ spin_lock_init(&rxnet->call_lock);
atomic_set(&rxnet->nr_calls, 1);
atomic_set(&rxnet->nr_conns, 1);
@@ -72,7 +72,7 @@ static __net_init int rxrpc_init_net(struct net *net)
timer_setup(&rxnet->client_conn_reap_timer,
rxrpc_client_conn_reap_timeout, 0);
- INIT_LIST_HEAD(&rxnet->local_endpoints);
+ INIT_HLIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
hash_init(rxnet->peer_hash);
@@ -98,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net)
proc_create_net("peers", 0444, rxnet->proc_net,
&rxrpc_peer_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net("locals", 0444, rxnet->proc_net,
+ &rxrpc_local_seq_ops,
+ sizeof(struct seq_net_private));
return 0;
err_proc:
@@ -115,6 +118,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
rxnet->live = false;
del_timer_sync(&rxnet->peer_keepalive_timer);
cancel_work_sync(&rxnet->peer_keepalive_work);
+ /* Remove the timer again as the worker may have restarted it. */
+ del_timer_sync(&rxnet->peer_keepalive_timer);
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
rxrpc_destroy_all_peers(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 10f2bf2e9068..9683617db704 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
u8 reason)
{
rxrpc_serial_t serial;
+ unsigned int tmp;
rxrpc_seq_t hard_ack, top, seq;
int ix;
u32 mtu, jmax;
u8 *ackp = pkt->acks;
+ tmp = atomic_xchg(&call->ackr_nr_unacked, 0);
+ tmp |= atomic_xchg(&call->ackr_nr_consumed, 0);
+ if (!tmp && (reason == RXRPC_ACK_DELAY ||
+ reason == RXRPC_ACK_IDLE))
+ return 0;
+
/* Barrier against rxrpc_input_data(). */
serial = call->ackr_serial;
hard_ack = READ_ONCE(call->rx_hard_ack);
@@ -89,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
pkt->ack.bufferSpace = htons(8);
pkt->ack.maxSkew = htons(0);
pkt->ack.firstPacket = htonl(hard_ack + 1);
- pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
+ pkt->ack.previousPacket = htonl(call->ackr_highest_seq);
pkt->ack.serial = htonl(serial);
pkt->ack.reason = reason;
pkt->ack.nAcks = top - hard_ack;
@@ -223,6 +230,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
spin_unlock_bh(&call->lock);
+ if (n == 0) {
+ kfree(pkt);
+ return 0;
+ }
iov[0].iov_base = pkt;
iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
@@ -259,13 +270,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
ntohl(pkt->ack.serial),
false, true,
rxrpc_propose_ack_retry_tx);
- } else {
- spin_lock_bh(&call->lock);
- if (after(hard_ack, call->ackr_consumed))
- call->ackr_consumed = hard_ack;
- if (after(top, call->ackr_seen))
- call->ackr_seen = top;
- spin_unlock_bh(&call->lock);
}
rxrpc_set_keepalive(call);
@@ -468,7 +472,7 @@ done:
if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
ack_lost_at += nowj;
WRITE_ONCE(call->ack_lost_at, ack_lost_at);
rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index be032850ae8c..32561e9567fe 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -16,22 +16,105 @@
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
+#include <net/icmp.h>
#include "ar-internal.h"
+static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
static void rxrpc_distribute_error(struct rxrpc_peer *, int,
enum rxrpc_call_completion);
/*
- * Find the peer associated with an ICMP packet.
+ * Find the peer associated with an ICMPv4 packet.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
+ unsigned int udp_offset,
+ unsigned int *info,
struct sockaddr_rxrpc *srx)
{
- struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ struct iphdr *ip, *ip0 = ip_hdr(skb);
+ struct icmphdr *icmp = icmp_hdr(skb);
+ struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
- _enter("");
+ _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
+
+ switch (icmp->type) {
+ case ICMP_DEST_UNREACH:
+ *info = ntohs(icmp->un.frag.mtu);
+ fallthrough;
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ ip = (struct iphdr *)((void *)icmp + 8);
+ break;
+ default:
+ return NULL;
+ }
+
+ memset(srx, 0, sizeof(*srx));
+ srx->transport_type = local->srx.transport_type;
+ srx->transport_len = local->srx.transport_len;
+ srx->transport.family = local->srx.transport.family;
+
+ /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
+ * versa?
+ */
+ switch (srx->transport.family) {
+ case AF_INET:
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = udp->dest;
+ memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
+ sizeof(struct in_addr));
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = udp->dest;
+ memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
+ sizeof(struct in_addr));
+ break;
+#endif
+
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ _net("ICMP {%pISp}", &srx->transport);
+ return rxrpc_lookup_peer_rcu(local, srx);
+}
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+/*
+ * Find the peer associated with an ICMPv6 packet.
+ */
+static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
+ struct sk_buff *skb,
+ unsigned int udp_offset,
+ unsigned int *info,
+ struct sockaddr_rxrpc *srx)
+{
+ struct icmp6hdr *icmp = icmp6_hdr(skb);
+ struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
+ struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
+
+ _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
+
+ switch (icmp->icmp6_type) {
+ case ICMPV6_DEST_UNREACH:
+ *info = ntohl(icmp->icmp6_mtu);
+ fallthrough;
+ case ICMPV6_PKT_TOOBIG:
+ case ICMPV6_TIME_EXCEED:
+ case ICMPV6_PARAMPROB:
+ ip = (struct ipv6hdr *)((void *)icmp + 8);
+ break;
+ default:
+ return NULL;
+ }
memset(srx, 0, sizeof(*srx));
srx->transport_type = local->srx.transport_type;
@@ -43,6 +126,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
*/
switch (srx->transport.family) {
case AF_INET:
+ _net("Rx ICMP6 on v4 sock");
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = udp->dest;
+ memcpy(&srx->transport.sin.sin_addr,
+ &ip->daddr.s6_addr32[3], sizeof(struct in_addr));
+ break;
+ case AF_INET6:
+ _net("Rx ICMP6");
+ srx->transport.sin.sin_port = udp->dest;
+ memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
+ sizeof(struct in6_addr));
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ _net("ICMP {%pISp}", &srx->transport);
+ return rxrpc_lookup_peer_rcu(local, srx);
+}
+#endif /* CONFIG_AF_RXRPC_IPV6 */
+
+/*
+ * Handle an error received on the local endpoint as a tunnel.
+ */
+void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
+ unsigned int udp_offset)
+{
+ struct sock_extended_err ee;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_local *local;
+ struct rxrpc_peer *peer;
+ unsigned int info = 0;
+ int err;
+ u8 version = ip_hdr(skb)->version;
+ u8 type = icmp_hdr(skb)->type;
+ u8 code = icmp_hdr(skb)->code;
+
+ rcu_read_lock();
+ local = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!local)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ rxrpc_new_skb(skb, rxrpc_skb_received);
+
+ switch (ip_hdr(skb)->version) {
+ case IPVERSION:
+ peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
+ &info, &srx);
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case 6:
+ peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
+ &info, &srx);
+ break;
+#endif
+ default:
+ rcu_read_unlock();
+ return;
+ }
+
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ if (!peer) {
+ rcu_read_unlock();
+ return;
+ }
+
+ memset(&ee, 0, sizeof(ee));
+
+ switch (version) {
+ case IPVERSION:
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_FRAG_NEEDED:
+ rxrpc_adjust_mtu(peer, info);
+ rcu_read_unlock();
+ rxrpc_put_peer(peer);
+ return;
+ default:
+ break;
+ }
+
+ err = EHOSTUNREACH;
+ if (code <= NR_ICMP_UNREACH) {
+ /* Might want to do something different with
+ * non-fatal errors
+ */
+ //harderr = icmp_err_convert[code].fatal;
+ err = icmp_err_convert[code].errno;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ err = EHOSTUNREACH;
+ break;
+ default:
+ err = EPROTO;
+ break;
+ }
+
+ ee.ee_origin = SO_EE_ORIGIN_ICMP;
+ ee.ee_type = type;
+ ee.ee_code = code;
+ ee.ee_errno = err;
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case 6:
+ switch (type) {
+ case ICMPV6_PKT_TOOBIG:
+ rxrpc_adjust_mtu(peer, info);
+ rcu_read_unlock();
+ rxrpc_put_peer(peer);
+ return;
+ }
+
+ icmpv6_err_convert(type, code, &err);
+
+ if (err == EACCES)
+ err = EHOSTUNREACH;
+
+ ee.ee_origin = SO_EE_ORIGIN_ICMP6;
+ ee.ee_type = type;
+ ee.ee_code = code;
+ ee.ee_errno = err;
+ break;
+#endif
+ }
+
+ trace_rxrpc_rx_icmp(peer, &ee, &srx);
+
+ rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
+ rcu_read_unlock();
+ rxrpc_put_peer(peer);
+}
+
+/*
+ * Find the peer associated with a local error.
+ */
+static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
+ const struct sk_buff *skb,
+ struct sockaddr_rxrpc *srx)
+{
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+
+ _enter("");
+
+ memset(srx, 0, sizeof(*srx));
+ srx->transport_type = local->srx.transport_type;
+ srx->transport_len = local->srx.transport_len;
+ srx->transport.family = local->srx.transport.family;
+
+ switch (srx->transport.family) {
+ case AF_INET:
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
@@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
/*
* Handle an MTU/fragmentation problem.
*/
-static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
+static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
- u32 mtu = serr->ee.ee_info;
-
_net("Rx ICMP Fragmentation Needed (%d)", mtu);
/* wind down the local interface MTU */
@@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk)
struct sock_exterr_skb *serr;
struct sockaddr_rxrpc srx;
struct rxrpc_local *local;
- struct rxrpc_peer *peer;
+ struct rxrpc_peer *peer = NULL;
struct sk_buff *skb;
rcu_read_lock();
@@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk)
}
rxrpc_new_skb(skb, rxrpc_skb_received);
serr = SKB_EXT_ERR(skb);
- if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
- _leave("UDP empty message");
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- return;
- }
- peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
- if (peer && !rxrpc_get_peer_maybe(peer))
- peer = NULL;
- if (!peer) {
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- _leave(" [no peer]");
- return;
- }
-
- trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
-
- if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
- serr->ee.ee_type == ICMP_DEST_UNREACH &&
- serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
- rxrpc_adjust_mtu(peer, serr);
- rcu_read_unlock();
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- rxrpc_put_peer(peer);
- _leave(" [MTU update]");
- return;
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
+ peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ if (peer) {
+ trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+ rxrpc_store_error(peer, serr);
+ }
}
- rxrpc_store_error(peer, serr);
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
rxrpc_put_peer(peer);
-
_leave("");
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 0298fe2ad6d3..26d2ae9baaf2 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 &&
- atomic_read(&peer->usage) > 0)
+ refcount_read(&peer->ref) > 0)
return peer;
}
@@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer) {
_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
}
return peer;
}
@@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
- atomic_set(&peer->usage, 1);
+ refcount_set(&peer->ref, 1);
peer->local = rxrpc_get_local(local);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
@@ -378,7 +378,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
return peer;
}
@@ -388,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
- int n;
+ int r;
- n = atomic_inc_return(&peer->usage);
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
+ __refcount_inc(&peer->ref, &r);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
return peer;
}
@@ -401,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ int r;
if (peer) {
- int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
- if (n > 0)
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
+ if (__refcount_inc_not_zero(&peer->ref, &r))
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
else
peer = NULL;
}
@@ -436,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id;
- int n;
+ bool dead;
+ int r;
if (peer) {
debug_id = peer->debug_id;
- n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
- if (n == 0)
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ if (dead)
__rxrpc_put_peer(peer);
}
}
@@ -455,11 +456,12 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = peer->debug_id;
- int n;
+ bool dead;
+ int r;
- n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
- if (n == 0) {
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ if (dead) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
rxrpc_free_peer(peer);
@@ -481,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
pr_err("Leaked peer %u {%u} %pISp\n",
peer->debug_id,
- atomic_read(&peer->usage),
+ refcount_read(&peer->ref),
&peer->srx.transport);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index e2f990754f88..245418943e01 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
*/
static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
__acquires(rcu)
- __acquires(rxnet->call_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
rcu_read_lock();
- read_lock(&rxnet->call_lock);
- return seq_list_start_head(&rxnet->calls, *_pos);
+ return seq_list_start_head_rcu(&rxnet->calls, *_pos);
}
static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
- return seq_list_next(v, &rxnet->calls, pos);
+ return seq_list_next_rcu(v, &rxnet->calls, pos);
}
static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
- __releases(rxnet->call_lock)
__releases(rcu)
{
- struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
-
- read_unlock(&rxnet->call_lock);
rcu_read_unlock();
}
@@ -107,7 +101,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->cid,
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->abort_code,
call->debug_id,
@@ -189,7 +183,7 @@ print:
conn->service_id,
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
rxrpc_conn_states[conn->state],
key_serial(conn->params.key),
atomic_read(&conn->serial),
@@ -239,7 +233,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
" %3u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
- atomic_read(&peer->usage),
+ refcount_read(&peer->ref),
peer->cong_cwnd,
peer->mtu,
now - peer->last_tx_at,
@@ -334,3 +328,72 @@ const struct seq_operations rxrpc_peer_seq_ops = {
.stop = rxrpc_peer_seq_stop,
.show = rxrpc_peer_seq_show,
};
+
+/*
+ * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals
+ */
+static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_local *local;
+ char lbuff[50];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local "
+ " Use Act\n");
+ return 0;
+ }
+
+ local = hlist_entry(v, struct rxrpc_local, link);
+
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
+
+ seq_printf(seq,
+ "UDP %-47.47s %3u %3u\n",
+ lbuff,
+ refcount_read(&local->ref),
+ atomic_read(&local->active_users));
+
+ return 0;
+}
+
+static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ unsigned int n;
+
+ rcu_read_lock();
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ n = *_pos;
+ if (n == 0)
+ return SEQ_START_TOKEN;
+
+ return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1);
+}
+
+static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos);
+}
+
+static void rxrpc_local_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+const struct seq_operations rxrpc_local_seq_ops = {
+ .start = rxrpc_local_seq_start,
+ .next = rxrpc_local_seq_next,
+ .stop = rxrpc_local_seq_stop,
+ .show = rxrpc_local_seq_show,
+};
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index 49bb972539aa..d2cf8e1d218f 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -57,7 +57,7 @@ struct rxrpc_wire_header {
uint8_t userStatus; /* app-layer defined status */
#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */
-
+
uint8_t securityIndex; /* security protocol ID */
union {
__be16 _rsvd; /* reserved */
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index eca6dda26c77..7e39c262fd79 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -260,11 +260,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
rxrpc_end_rx_phase(call, serial);
} else {
/* Check to see if there's an ACK that needs sending. */
- if (after_eq(hard_ack, call->ackr_consumed + 2) ||
- after_eq(top, call->ackr_seen + 2) ||
- (hard_ack == top && after(hard_ack, call->ackr_consumed)))
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
- true, true,
+ if (atomic_inc_return(&call->ackr_nr_consumed) > 2)
+ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial,
+ true, false,
rxrpc_propose_ack_rotate_rx);
if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
rxrpc_send_ack_packet(call, false, NULL);
@@ -773,46 +771,3 @@ call_complete:
goto out;
}
EXPORT_SYMBOL(rxrpc_kernel_recv_data);
-
-/**
- * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
- * @sock: The socket that the call exists on
- * @call: The call to query
- * @_ts: Where to put the timestamp
- *
- * Retrieve the timestamp from the first DATA packet of the reply if it is
- * in the ring. Returns true if successful, false if not.
- */
-bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call,
- ktime_t *_ts)
-{
- struct sk_buff *skb;
- rxrpc_seq_t hard_ack, top, seq;
- bool success = false;
-
- mutex_lock(&call->user_mutex);
-
- if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY)
- goto out;
-
- hard_ack = call->rx_hard_ack;
- if (hard_ack != 0)
- goto out;
-
- seq = hard_ack + 1;
- top = smp_load_acquire(&call->rx_top);
- if (after(seq, top))
- goto out;
-
- skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK];
- if (!skb)
- goto out;
-
- *_ts = skb_get_ktime(skb);
- success = true;
-
-out:
- mutex_unlock(&call->user_mutex);
- return success;
-}
-EXPORT_SYMBOL(rxrpc_kernel_get_reply_time);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 08aab5c01437..78fa0524156f 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -431,7 +431,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
break;
}
- _leave(" = %d [set %hx]", ret, y);
+ _leave(" = %d [set %x]", ret, y);
return ret;
}
@@ -540,7 +540,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
* directly into the target buffer.
*/
sg = _sg;
- nsg = skb_shinfo(skb)->nr_frags;
+ nsg = skb_shinfo(skb)->nr_frags + 1;
if (nsg <= 4) {
nsg = 4;
} else {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index af8ad6c30b9f..3c3a626459de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
return sock_intr_errno(*timeo);
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
}
}
@@ -290,37 +287,48 @@ out:
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
long timeo;
- bool more;
- int ret, copied;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+reload:
+ ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
- return -EPIPE;
-
- more = msg->msg_flags & MSG_MORE;
-
+ goto maybe_error;
+ state = READ_ONCE(call->state);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY)
+ goto maybe_error;
+
+ ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
- if (len > call->tx_total_len)
- return -EMSGSIZE;
- if (!more && len != call->tx_total_len)
- return -EMSGSIZE;
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
}
skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_seen);
- copied = 0;
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
_debug("alloc");
- if (!rxrpc_check_tx_space(call, NULL)) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo,
- msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
- goto maybe_error;
- }
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;
/* Work out the maximum size of a packet. Assume that
* the security header is going to be in the padded
@@ -444,6 +444,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
success:
ret = copied;
+ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
+ read_lock_bh(&call->state_lock);
+ if (call->error < 0)
+ ret = call->error;
+ read_unlock_bh(&call->state_lock);
+ }
out:
call->tx_pending = skb;
_leave(" = %d", ret);
@@ -462,6 +468,27 @@ maybe_error:
efault:
ret = -EFAULT;
goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
}
/*
@@ -623,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
+ bool dropped_lock = false;
int ret;
struct rxrpc_send_params p = {
@@ -731,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (rxrpc_is_client_call(call) &&
- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (rxrpc_is_service_call(call) &&
- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
}
out_put_unlock:
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
@@ -773,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, size_t len,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ bool dropped_lock = false;
int ret;
_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
@@ -790,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
@@ -804,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
break;
}
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}
diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
index ead3471307ee..ee269e0e6ee8 100644
--- a/net/rxrpc/server_key.c
+++ b/net/rxrpc/server_key.c
@@ -84,6 +84,9 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
prep->payload.data[1] = (struct rxrpc_security *)sec;
+ if (!sec->preparse_server_key)
+ return -EINVAL;
+
return sec->preparse_server_key(prep);
}
@@ -91,7 +94,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
{
const struct rxrpc_security *sec = prep->payload.data[1];
- if (sec)
+ if (sec && sec->free_preparse_server_key)
sec->free_preparse_server_key(prep);
}
@@ -99,7 +102,7 @@ static void rxrpc_destroy_s(struct key *key)
{
const struct rxrpc_security *sec = key->payload.data[1];
- if (sec)
+ if (sec && sec->destroy_server_key)
sec->destroy_server_key(key);
}
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 0348d2bf6f7d..580a5acffee7 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
const void *here = __builtin_return_address(0);
if (skb) {
int n;
- CHECK_SLAB_OKAY(&skb->users);
n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
rxrpc_skb(skb)->rx_flags, here);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 540351d6a5f4..555e0910786b 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -12,7 +12,7 @@
static struct ctl_table_header *rxrpc_sysctl_reg_table;
static const unsigned int four = 4;
-static const unsigned int thirtytwo = 32;
+static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
static const unsigned long one_jiffy = 1;
@@ -89,7 +89,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&four,
- .extra2 = (void *)&thirtytwo,
+ .extra2 = (void *)&max_backlog,
},
{
.procname = "rx_window_size",
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 32563cef85bf..9b31a10cc639 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -195,7 +195,7 @@ static int offload_action_init(struct flow_offload_action *fl_action,
if (act->ops->offload_act_setup) {
spin_lock_bh(&act->tcfa_lock);
err = act->ops->offload_act_setup(act, fl_action, NULL,
- false);
+ false, extack);
spin_unlock_bh(&act->tcfa_lock);
return err;
}
@@ -271,10 +271,10 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
if (err)
goto fl_err;
- err = tc_setup_action(&fl_action->action, actions);
+ err = tc_setup_action(&fl_action->action, actions, extack);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
- "Failed to setup tc actions for offload\n");
+ "Failed to setup tc actions for offload");
goto fl_err;
}
@@ -588,7 +588,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p)
}
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct nlattr *nest;
int n_i = 0;
@@ -604,20 +605,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
+ ret = 0;
mutex_lock(&idrinfo->lock);
idr_for_each_entry_ul(idr, p, tmp, id) {
if (IS_ERR(p))
continue;
ret = tcf_idr_release_unsafe(p);
- if (ret == ACT_P_DELETED) {
+ if (ret == ACT_P_DELETED)
module_put(ops->owner);
- n_i++;
- } else if (ret < 0) {
- mutex_unlock(&idrinfo->lock);
- goto nla_put_failure;
- }
+ else if (ret < 0)
+ break;
+ n_i++;
}
mutex_unlock(&idrinfo->lock);
+ if (ret < 0) {
+ if (n_i)
+ NL_SET_ERR_MSG(extack, "Unable to flush all TC actions");
+ else
+ goto nla_put_failure;
+ }
ret = nla_put_u32(skb, TCA_FCNT, n_i);
if (ret)
@@ -638,7 +644,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct tcf_idrinfo *idrinfo = tn->idrinfo;
if (type == RTM_DELACTION) {
- return tcf_del_walker(idrinfo, skb, ops);
+ return tcf_del_walker(idrinfo, skb, ops, extack);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(idrinfo, skb, cb);
} else {
@@ -670,6 +676,31 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
}
EXPORT_SYMBOL(tcf_idr_search);
+static int __tcf_generic_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, ops->net_id);
+
+ if (unlikely(ops->walk))
+ return ops->walk(net, skb, cb, type, ops, extack);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int __tcf_idr_search(struct net *net,
+ const struct tc_action_ops *ops,
+ struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ops->net_id);
+
+ if (unlikely(ops->lookup))
+ return ops->lookup(net, a, index);
+
+ return tcf_idr_search(tn, a, index);
+}
+
static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
{
struct tc_action *p;
@@ -920,7 +951,7 @@ int tcf_register_action(struct tc_action_ops *act,
struct tc_action_ops *a;
int ret;
- if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
+ if (!act->act || !act->dump || !act->init)
return -EINVAL;
/* We have to register pernet ops before making the action ops visible,
@@ -1037,6 +1068,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ int repeat_ttl;
if (jmp_prgcnt > 0) {
jmp_prgcnt -= 1;
@@ -1045,11 +1077,17 @@ restart_act_graph:
if (tc_act_skip_sw(a->tcfa_flags))
continue;
+
+ repeat_ttl = 32;
repeat:
ret = a->ops->act(skb, a, res);
- if (ret == TC_ACT_REPEAT)
- goto repeat; /* we need a ttl - JHS */
-
+ if (unlikely(ret == TC_ACT_REPEAT)) {
+ if (--repeat_ttl != 0)
+ goto repeat;
+ /* suspicious opcode, stop pipeline */
+ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
+ return TC_ACT_OK;
+ }
if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
@@ -1439,6 +1477,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
continue;
if (skip_sw != tc_act_skip_sw(act->tcfa_flags) ||
skip_hw != tc_act_skip_hw(act->tcfa_flags)) {
+ NL_SET_ERR_MSG(extack,
+ "Mismatch between action and filter offload flags");
err = -EINVAL;
goto err;
}
@@ -1623,7 +1663,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
goto err_out;
}
err = -ENOENT;
- if (ops->lookup(net, &a, index) == 0) {
+ if (__tcf_idr_search(net, ops, &a, index) == 0) {
NL_SET_ERR_MSG(extack, "TC action with specified index not found");
goto err_mod;
}
@@ -1688,7 +1728,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
goto out_module_put;
}
- err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
+ err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack);
if (err <= 0) {
nla_nest_cancel(skb, nest);
goto out_module_put;
@@ -2106,7 +2146,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto out_module_put;
- ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
+ ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL);
if (ret < 0)
goto out_module_put;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index a77d8908e737..b79eee44e24e 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -29,7 +29,6 @@ struct tcf_bpf_cfg {
bool is_ebpf;
};
-static unsigned int bpf_net_id;
static struct tc_action_ops act_bpf_ops;
static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
@@ -53,6 +52,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(filter, skb);
}
+ if (unlikely(!skb->tstamp && skb->mono_delivery_time))
+ skb->mono_delivery_time = 0;
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
@@ -278,7 +279,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
+ struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -332,7 +333,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
is_ebpf = tb[TCA_ACT_BPF_FD];
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ if (is_bpf == is_ebpf) {
ret = -EINVAL;
goto put_chain;
}
@@ -388,23 +389,6 @@ static void tcf_bpf_cleanup(struct tc_action *act)
tcf_bpf_cfg_cleanup(&tmp);
}
-static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
.id = TCA_ID_BPF,
@@ -413,27 +397,25 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.dump = tcf_bpf_dump,
.cleanup = tcf_bpf_cleanup,
.init = tcf_bpf_init,
- .walk = tcf_bpf_walker,
- .lookup = tcf_bpf_search,
.size = sizeof(struct tcf_bpf),
};
static __net_init int bpf_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, bpf_net_id);
+ struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id);
return tc_action_net_init(net, tn, &act_bpf_ops);
}
static void __net_exit bpf_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, bpf_net_id);
+ tc_action_net_exit(net_list, act_bpf_ops.net_id);
}
static struct pernet_operations bpf_net_ops = {
.init = bpf_init_net,
.exit_batch = bpf_exit_net,
- .id = &bpf_net_id,
+ .id = &act_bpf_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 09e2aafc8943..66b143bb04ac 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -25,7 +25,6 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
-static unsigned int connmark_net_id;
static struct tc_action_ops act_connmark_ops;
static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a,
@@ -99,7 +98,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
+ struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
@@ -200,23 +199,6 @@ nla_put_failure:
return -1;
}
-static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.id = TCA_ID_CONNMARK,
@@ -224,27 +206,25 @@ static struct tc_action_ops act_connmark_ops = {
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
- .walk = tcf_connmark_walker,
- .lookup = tcf_connmark_search,
.size = sizeof(struct tcf_connmark_info),
};
static __net_init int connmark_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, connmark_net_id);
+ struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
return tc_action_net_init(net, tn, &act_connmark_ops);
}
static void __net_exit connmark_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, connmark_net_id);
+ tc_action_net_exit(net_list, act_connmark_ops.net_id);
}
static struct pernet_operations connmark_net_ops = {
.init = connmark_init_net,
.exit_batch = connmark_exit_net,
- .id = &connmark_net_id,
+ .id = &act_connmark_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e0f515b774ca..1366adf9b909 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,7 +37,6 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
};
-static unsigned int csum_net_id;
static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
@@ -45,7 +44,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
+ struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
@@ -673,30 +672,14 @@ static void tcf_csum_cleanup(struct tc_action *a)
kfree_rcu(params, rcu);
}
-static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static size_t tcf_csum_get_fill_size(const struct tc_action *act)
{
return nla_total_size(sizeof(struct tc_csum));
}
static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -721,8 +704,6 @@ static struct tc_action_ops act_csum_ops = {
.dump = tcf_csum_dump,
.init = tcf_csum_init,
.cleanup = tcf_csum_cleanup,
- .walk = tcf_csum_walker,
- .lookup = tcf_csum_search,
.get_fill_size = tcf_csum_get_fill_size,
.offload_act_setup = tcf_csum_offload_act_setup,
.size = sizeof(struct tcf_csum),
@@ -730,20 +711,20 @@ static struct tc_action_ops act_csum_ops = {
static __net_init int csum_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, csum_net_id);
+ struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id);
return tc_action_net_init(net, tn, &act_csum_ops);
}
static void __net_exit csum_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, csum_net_id);
+ tc_action_net_exit(net_list, act_csum_ops.net_id);
}
static struct pernet_operations csum_net_ops = {
.init = csum_init_net,
.exit_batch = csum_exit_net,
- .id = &csum_net_id,
+ .id = &act_csum_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..b38d91d6b249 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -57,12 +57,6 @@ static const struct rhashtable_params zones_params = {
.automatic_shrinking = true,
};
-static struct nf_ct_ext_type act_ct_extend __read_mostly = {
- .len = sizeof(struct nf_conn_act_ct_ext),
- .align = __alignof__(struct nf_conn_act_ct_ext),
- .id = NF_CT_EXT_ACT_CT,
-};
-
static struct flow_action_entry *
tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action)
{
@@ -283,7 +277,7 @@ static struct nf_flowtable_type flowtable_ct = {
.owner = THIS_MODULE,
};
-static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
+static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
{
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
@@ -309,6 +303,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
err = nf_flow_table_init(&ct_ft->nf_ft);
if (err)
goto err_init;
+ write_pnet(&ct_ft->nf_ft.net, net);
__module_get(THIS_MODULE);
out_unlock:
@@ -361,6 +356,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
}
}
+static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
+ struct nf_conn_act_ct_ext *act_ct_ext, u8 dir)
+{
+ entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC;
+ entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
+}
+
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp)
@@ -385,10 +387,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
- entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
- entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
- act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL);
+ tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
}
err = flow_offload_add(&ct_ft->nf_ft, entry);
@@ -421,6 +421,19 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
break;
case IPPROTO_UDP:
break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE: {
+ struct nf_conntrack_tuple *tuple;
+
+ if (ct->status & IPS_NAT_MASK)
+ return;
+ tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ /* No support for GRE v1 */
+ if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
+ return;
+ break;
+ }
+#endif
default:
return;
}
@@ -440,6 +453,8 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
+ size_t hdrsize;
+ u8 ipproto;
if (!pskb_network_may_pull(skb, sizeof(*iph)))
return false;
@@ -451,29 +466,54 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
unlikely(thoff != sizeof(struct iphdr)))
return false;
- if (iph->protocol != IPPROTO_TCP &&
- iph->protocol != IPPROTO_UDP)
+ ipproto = iph->protocol;
+ switch (ipproto) {
+ case IPPROTO_TCP:
+ hdrsize = sizeof(struct tcphdr);
+ break;
+ case IPPROTO_UDP:
+ hdrsize = sizeof(*ports);
+ break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ hdrsize = sizeof(struct gre_base_hdr);
+ break;
+#endif
+ default:
return false;
+ }
if (iph->ttl <= 1)
return false;
- if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
- thoff + sizeof(struct tcphdr) :
- thoff + sizeof(*ports)))
+ if (!pskb_network_may_pull(skb, thoff + hdrsize))
return false;
- iph = ip_hdr(skb);
- if (iph->protocol == IPPROTO_TCP)
+ switch (ipproto) {
+ case IPPROTO_TCP:
*tcph = (void *)(skb_network_header(skb) + thoff);
+ fallthrough;
+ case IPPROTO_UDP:
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ break;
+ case IPPROTO_GRE: {
+ struct gre_base_hdr *greh;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+ if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+ return false;
+ break;
+ }
+ }
+
+ iph = ip_hdr(skb);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET;
- tuple->l4proto = iph->protocol;
+ tuple->l4proto = ipproto;
return true;
}
@@ -486,36 +526,63 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
+ size_t hdrsize;
+ u8 nexthdr;
if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
return false;
ip6h = ipv6_hdr(skb);
+ thoff = sizeof(*ip6h);
- if (ip6h->nexthdr != IPPROTO_TCP &&
- ip6h->nexthdr != IPPROTO_UDP)
+ nexthdr = ip6h->nexthdr;
+ switch (nexthdr) {
+ case IPPROTO_TCP:
+ hdrsize = sizeof(struct tcphdr);
+ break;
+ case IPPROTO_UDP:
+ hdrsize = sizeof(*ports);
+ break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ hdrsize = sizeof(struct gre_base_hdr);
+ break;
+#endif
+ default:
return false;
+ }
if (ip6h->hop_limit <= 1)
return false;
- thoff = sizeof(*ip6h);
- if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
- thoff + sizeof(struct tcphdr) :
- thoff + sizeof(*ports)))
+ if (!pskb_network_may_pull(skb, thoff + hdrsize))
return false;
- ip6h = ipv6_hdr(skb);
- if (ip6h->nexthdr == IPPROTO_TCP)
+ switch (nexthdr) {
+ case IPPROTO_TCP:
*tcph = (void *)(skb_network_header(skb) + thoff);
+ fallthrough;
+ case IPPROTO_UDP:
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ break;
+ case IPPROTO_GRE: {
+ struct gre_base_hdr *greh;
+
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+ if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+ return false;
+ break;
+ }
+ }
+
+ ip6h = ipv6_hdr(skb);
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET6;
- tuple->l4proto = ip6h->nexthdr;
+ tuple->l4proto = nexthdr;
return true;
}
@@ -533,11 +600,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct nf_conn *ct;
u8 dir;
- /* Previously seen or loopback */
- ct = nf_ct_get(skb, &ctinfo);
- if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
- return false;
-
switch (family) {
case NFPROTO_IPV4:
if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
@@ -587,7 +649,6 @@ static void tcf_ct_flow_tables_uninit(void)
}
static struct tc_action_ops act_ct_ops;
-static unsigned int ct_net_id;
struct tc_ct_action_net {
struct tc_action_net tn; /* Must be first */
@@ -605,22 +666,25 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
if (!ct)
return false;
if (!net_eq(net, read_pnet(&ct->ct_net)))
- return false;
+ goto drop_ct;
if (nf_ct_zone(ct)->id != zone_id)
- return false;
+ goto drop_ct;
/* Force conntrack entry direction. */
if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
if (nf_ct_is_confirmed(ct))
nf_ct_kill(ct);
- nf_ct_put(ct);
- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-
- return false;
+ goto drop_ct;
}
return true;
+
+drop_ct:
+ nf_ct_put(ct);
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+
+ return false;
}
/* Trim the skb to the length specified by the IP/IPv6 header,
@@ -632,7 +696,6 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
{
unsigned int len;
- int err;
switch (family) {
case NFPROTO_IPV4:
@@ -646,9 +709,7 @@ static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
len = skb->len;
}
- err = pskb_trim_rcsum(skb, len);
-
- return err;
+ return pskb_trim_rcsum(skb, len);
}
static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
@@ -1190,7 +1251,7 @@ static int tcf_ct_fill_params(struct net *net,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
+ struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
struct nf_conntrack_zone zone;
struct nf_conn *tmpl;
int err;
@@ -1265,7 +1326,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, ct_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_ct_params *params = NULL;
struct nlattr *tb[TCA_CT_MAX + 1];
@@ -1327,9 +1388,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (err)
goto cleanup;
- err = tcf_ct_flow_table_get(params);
+ err = tcf_ct_flow_table_get(net, params);
if (err)
- goto cleanup;
+ goto cleanup_params;
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -1344,6 +1405,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return res;
+cleanup_params:
+ if (params->tmpl)
+ nf_ct_put(params->tmpl);
cleanup:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
@@ -1493,23 +1557,6 @@ nla_put_failure:
return -1;
}
-static int tcf_ct_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, ct_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ct_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
u64 drops, u64 lastuse, bool hw)
{
@@ -1520,7 +1567,8 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
}
static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -1547,8 +1595,6 @@ static struct tc_action_ops act_ct_ops = {
.dump = tcf_ct_dump,
.init = tcf_ct_init,
.cleanup = tcf_ct_cleanup,
- .walk = tcf_ct_walker,
- .lookup = tcf_ct_search,
.stats_update = tcf_stats_update,
.offload_act_setup = tcf_ct_offload_act_setup,
.size = sizeof(struct tcf_ct),
@@ -1557,7 +1603,7 @@ static struct tc_action_ops act_ct_ops = {
static __net_init int ct_init_net(struct net *net)
{
unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
- struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
+ struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
if (nf_connlabels_get(net, n_bits - 1)) {
tn->labels = false;
@@ -1575,20 +1621,20 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
- struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
+ struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
if (tn->labels)
nf_connlabels_put(net);
}
rtnl_unlock();
- tc_action_net_exit(net_list, ct_net_id);
+ tc_action_net_exit(net_list, act_ct_ops.net_id);
}
static struct pernet_operations ct_net_ops = {
.init = ct_init_net,
.exit_batch = ct_exit_net,
- .id = &ct_net_id,
+ .id = &act_ct_ops.net_id,
.size = sizeof(struct tc_ct_action_net),
};
@@ -1608,16 +1654,10 @@ static int __init ct_init_module(void)
if (err)
goto err_register;
- err = nf_ct_extend_register(&act_ct_extend);
- if (err)
- goto err_register_extend;
-
static_branch_inc(&tcf_frag_xmit_count);
return 0;
-err_register_extend:
- tcf_unregister_action(&act_ct_ops, &ct_net_ops);
err_register:
tcf_ct_flow_tables_uninit();
err_tbl_init:
@@ -1628,7 +1668,6 @@ err_tbl_init:
static void __exit ct_cleanup_module(void)
{
static_branch_dec(&tcf_frag_xmit_count);
- nf_ct_extend_unregister(&act_ct_extend);
tcf_unregister_action(&act_ct_ops, &ct_net_ops);
tcf_ct_flow_tables_uninit();
destroy_workqueue(act_ct_wq);
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 0281e45987a4..d4102f0a9abd 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -25,7 +25,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
static struct tc_action_ops act_ctinfo_ops;
-static unsigned int ctinfo_net_id;
static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
struct tcf_ctinfo_params *cp,
@@ -157,7 +156,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
u32 dscpmask = 0, dscpstatemask, index;
struct nlattr *tb[TCA_CTINFO_MAX + 1];
@@ -342,23 +341,6 @@ nla_put_failure:
return -1;
}
-static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static void tcf_ctinfo_cleanup(struct tc_action *a)
{
struct tcf_ctinfo *ci = to_ctinfo(a);
@@ -377,27 +359,25 @@ static struct tc_action_ops act_ctinfo_ops = {
.dump = tcf_ctinfo_dump,
.init = tcf_ctinfo_init,
.cleanup= tcf_ctinfo_cleanup,
- .walk = tcf_ctinfo_walker,
- .lookup = tcf_ctinfo_search,
.size = sizeof(struct tcf_ctinfo),
};
static __net_init int ctinfo_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id);
return tc_action_net_init(net, tn, &act_ctinfo_ops);
}
static void __net_exit ctinfo_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, ctinfo_net_id);
+ tc_action_net_exit(net_list, act_ctinfo_ops.net_id);
}
static struct pernet_operations ctinfo_net_ops = {
.init = ctinfo_init_net,
.exit_batch = ctinfo_exit_net,
- .id = &ctinfo_net_id,
+ .id = &act_ctinfo_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index bde6a6c01e64..62d682b96b88 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -19,14 +19,13 @@
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
-static unsigned int gact_net_id;
static struct tc_action_ops act_gact_ops;
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
- if (prandom_u32() % gact->tcfg_pval)
+ if (prandom_u32_max(gact->tcfg_pval))
return gact->tcf_action;
return gact->tcfg_paction;
}
@@ -55,7 +54,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
+ struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -222,23 +221,6 @@ nla_put_failure:
return -1;
}
-static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static size_t tcf_gact_get_fill_size(const struct tc_action *act)
{
size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
@@ -253,7 +235,8 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
}
static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -267,7 +250,17 @@ static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data,
} else if (is_tcf_gact_goto_chain(act)) {
entry->id = FLOW_ACTION_GOTO;
entry->chain_index = tcf_gact_goto_chain_index(act);
+ } else if (is_tcf_gact_continue(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_gact_reclassify(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_gact_pipe(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported");
+ return -EOPNOTSUPP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
@@ -297,8 +290,6 @@ static struct tc_action_ops act_gact_ops = {
.stats_update = tcf_gact_stats_update,
.dump = tcf_gact_dump,
.init = tcf_gact_init,
- .walk = tcf_gact_walker,
- .lookup = tcf_gact_search,
.get_fill_size = tcf_gact_get_fill_size,
.offload_act_setup = tcf_gact_offload_act_setup,
.size = sizeof(struct tcf_gact),
@@ -306,20 +297,20 @@ static struct tc_action_ops act_gact_ops = {
static __net_init int gact_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, gact_net_id);
+ struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id);
return tc_action_net_init(net, tn, &act_gact_ops);
}
static void __net_exit gact_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, gact_net_id);
+ tc_action_net_exit(net_list, act_gact_ops.net_id);
}
static struct pernet_operations gact_net_ops = {
.init = gact_init_net,
.exit_batch = gact_exit_net,
- .id = &gact_net_id,
+ .id = &act_gact_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index d56e73843a4b..3049878e7315 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -15,7 +15,6 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gate.h>
-static unsigned int gate_net_id;
static struct tc_action_ops act_gate_ops;
static ktime_t gate_get_time(struct tcf_gate *gact)
@@ -298,7 +297,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, gate_net_id);
+ struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
enum tk_offsets tk_offset = TK_OFFS_TAI;
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
@@ -565,16 +564,6 @@ nla_put_failure:
return -1;
}
-static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, gate_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets,
u64 drops, u64 lastuse, bool hw)
{
@@ -585,13 +574,6 @@ static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets,
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
-static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, gate_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static size_t tcf_gate_get_fill_size(const struct tc_action *act)
{
return nla_total_size(sizeof(struct tc_gate));
@@ -619,7 +601,8 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry,
}
static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -653,30 +636,28 @@ static struct tc_action_ops act_gate_ops = {
.dump = tcf_gate_dump,
.init = tcf_gate_init,
.cleanup = tcf_gate_cleanup,
- .walk = tcf_gate_walker,
.stats_update = tcf_gate_stats_update,
.get_fill_size = tcf_gate_get_fill_size,
- .lookup = tcf_gate_search,
.offload_act_setup = tcf_gate_offload_act_setup,
.size = sizeof(struct tcf_gate),
};
static __net_init int gate_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, gate_net_id);
+ struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id);
return tc_action_net_init(net, tn, &act_gate_ops);
}
static void __net_exit gate_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, gate_net_id);
+ tc_action_net_exit(net_list, act_gate_ops.net_id);
}
static struct pernet_operations gate_net_ops = {
.init = gate_init_net,
.exit_batch = gate_exit_net,
- .id = &gate_net_id,
+ .id = &act_gate_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 41ba55e60b1b..41d63b33461d 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -30,7 +30,6 @@
#include <linux/etherdevice.h>
#include <net/ife.h>
-static unsigned int ife_net_id;
static int max_metacnt = IFE_META_MAX + 1;
static struct tc_action_ops act_ife_ops;
@@ -482,7 +481,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
@@ -878,23 +877,6 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
return tcf_ife_decode(skb, a, res);
}
-static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
.id = TCA_ID_IFE,
@@ -903,27 +885,25 @@ static struct tc_action_ops act_ife_ops = {
.dump = tcf_ife_dump,
.cleanup = tcf_ife_cleanup,
.init = tcf_ife_init,
- .walk = tcf_ife_walker,
- .lookup = tcf_ife_search,
.size = sizeof(struct tcf_ife_info),
};
static __net_init int ife_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, ife_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id);
return tc_action_net_init(net, tn, &act_ife_ops);
}
static void __net_exit ife_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, ife_net_id);
+ tc_action_net_exit(net_list, act_ife_ops.net_id);
}
static struct pernet_operations ife_net_ops = {
.init = ife_init_net,
.exit_batch = ife_exit_net,
- .id = &ife_net_id,
+ .id = &act_ife_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 2f3d507c24a1..1625e1037416 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -24,10 +24,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
-static unsigned int ipt_net_id;
static struct tc_action_ops act_ipt_ops;
-
-static unsigned int xt_net_id;
static struct tc_action_ops act_xt_ops;
static int ipt_init_target(struct net *net, struct xt_entry_target *t,
@@ -206,8 +203,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
- tp, flags);
+ return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est,
+ a, &act_ipt_ops, tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
@@ -215,8 +212,8 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
- tp, flags);
+ return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est,
+ a, &act_xt_ops, tp, flags);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
@@ -316,23 +313,6 @@ nla_put_failure:
return -1;
}
-static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
.id = TCA_ID_IPT,
@@ -341,47 +321,28 @@ static struct tc_action_ops act_ipt_ops = {
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_release,
.init = tcf_ipt_init,
- .walk = tcf_ipt_walker,
- .lookup = tcf_ipt_search,
.size = sizeof(struct tcf_ipt),
};
static __net_init int ipt_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
+ struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id);
return tc_action_net_init(net, tn, &act_ipt_ops);
}
static void __net_exit ipt_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, ipt_net_id);
+ tc_action_net_exit(net_list, act_ipt_ops.net_id);
}
static struct pernet_operations ipt_net_ops = {
.init = ipt_init_net,
.exit_batch = ipt_exit_net,
- .id = &ipt_net_id,
+ .id = &act_ipt_ops.net_id,
.size = sizeof(struct tc_action_net),
};
-static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
.id = TCA_ID_XT,
@@ -390,27 +351,25 @@ static struct tc_action_ops act_xt_ops = {
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_release,
.init = tcf_xt_init,
- .walk = tcf_xt_walker,
- .lookup = tcf_xt_search,
.size = sizeof(struct tcf_ipt),
};
static __net_init int xt_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
+ struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id);
return tc_action_net_init(net, tn, &act_xt_ops);
}
static void __net_exit xt_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, xt_net_id);
+ tc_action_net_exit(net_list, act_xt_ops.net_id);
}
static struct pernet_operations xt_net_ops = {
.init = xt_init_net,
.exit_batch = xt_exit_net,
- .id = &xt_net_id,
+ .id = &act_xt_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 39acd1d18609..b8ad6ae282c0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -79,14 +79,13 @@ static void tcf_mirred_release(struct tc_action *a)
/* last reference to action, no need to lock */
dev = rcu_dereference_protected(m->tcfm_dev, 1);
- dev_put_track(dev, &m->tcfm_dev_tracker);
+ netdev_put(dev, &m->tcfm_dev_tracker);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
};
-static unsigned int mirred_net_id;
static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
@@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
+ struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -181,7 +180,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
mac_header_xmit = dev_is_mac_header_xmit(ndev);
odev = rcu_replace_pointer(m->tcfm_dev, ndev,
lockdep_is_held(&m->tcf_lock));
- dev_put_track(odev, &m->tcfm_dev_tracker);
+ netdev_put(odev, &m->tcfm_dev_tracker);
netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
@@ -306,8 +305,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
- res->ingress = want_ingress;
- err = tcf_mirred_forward(res->ingress, skb);
+ err = tcf_mirred_forward(want_ingress, skb);
if (err)
tcf_action_inc_overlimit_qstats(&m->common);
__this_cpu_dec(mirred_rec_level);
@@ -373,23 +371,6 @@ nla_put_failure:
return -1;
}
-static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static int mirred_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -402,7 +383,7 @@ static int mirred_device_event(struct notifier_block *unused,
list_for_each_entry(m, &mirred_list, tcfm_list) {
spin_lock_bh(&m->tcf_lock);
if (tcf_mirred_dev_dereference(m) == dev) {
- dev_put_track(dev, &m->tcfm_dev_tracker);
+ netdev_put(dev, &m->tcfm_dev_tracker);
/* Note : no rcu grace period necessary, as
* net_device are already rcu protected.
*/
@@ -460,7 +441,8 @@ static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry,
}
static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -478,6 +460,7 @@ static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
entry->id = FLOW_ACTION_MIRRED_INGRESS;
tcf_offload_mirred_get_dev(entry, act);
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
@@ -508,8 +491,6 @@ static struct tc_action_ops act_mirred_ops = {
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_release,
.init = tcf_mirred_init,
- .walk = tcf_mirred_walker,
- .lookup = tcf_mirred_search,
.get_fill_size = tcf_mirred_get_fill_size,
.offload_act_setup = tcf_mirred_offload_act_setup,
.size = sizeof(struct tcf_mirred),
@@ -518,20 +499,20 @@ static struct tc_action_ops act_mirred_ops = {
static __net_init int mirred_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, mirred_net_id);
+ struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id);
return tc_action_net_init(net, tn, &act_mirred_ops);
}
static void __net_exit mirred_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, mirred_net_id);
+ tc_action_net_exit(net_list, act_mirred_ops.net_id);
}
static struct pernet_operations mirred_net_ops = {
.init = mirred_init_net,
.exit_batch = mirred_exit_net,
- .id = &mirred_net_id,
+ .id = &act_mirred_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index b9ff3459fdab..8ad25cc8ccd5 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -15,7 +15,6 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mpls.h>
-static unsigned int mpls_net_id;
static struct tc_action_ops act_mpls_ops;
#define ACT_MPLS_TTL_DEFAULT 255
@@ -155,7 +154,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, mpls_net_id);
+ struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MPLS_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -367,25 +366,9 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int tcf_mpls_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, mpls_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, mpls_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -410,7 +393,14 @@ static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data,
entry->mpls_mangle.bos = tcf_mpls_bos(act);
entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
break;
+ case TCA_MPLS_ACT_DEC_TTL:
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"dec_ttl\" option is used");
+ return -EOPNOTSUPP;
+ case TCA_MPLS_ACT_MAC_PUSH:
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"mac_push\" option is used");
+ return -EOPNOTSUPP;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported MPLS mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
@@ -443,28 +433,26 @@ static struct tc_action_ops act_mpls_ops = {
.dump = tcf_mpls_dump,
.init = tcf_mpls_init,
.cleanup = tcf_mpls_cleanup,
- .walk = tcf_mpls_walker,
- .lookup = tcf_mpls_search,
.offload_act_setup = tcf_mpls_offload_act_setup,
.size = sizeof(struct tcf_mpls),
};
static __net_init int mpls_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, mpls_net_id);
+ struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id);
return tc_action_net_init(net, tn, &act_mpls_ops);
}
static void __net_exit mpls_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, mpls_net_id);
+ tc_action_net_exit(net_list, act_mpls_ops.net_id);
}
static struct pernet_operations mpls_net_ops = {
.init = mpls_init_net,
.exit_batch = mpls_exit_net,
- .id = &mpls_net_id,
+ .id = &act_mpls_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 2a39b3729e84..9265145f1040 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -26,7 +26,6 @@
#include <net/udp.h>
-static unsigned int nat_net_id;
static struct tc_action_ops act_nat_ops;
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
@@ -37,7 +36,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action **a, struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
+ struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -289,23 +288,6 @@ nla_put_failure:
return -1;
}
-static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.id = TCA_ID_NAT,
@@ -313,27 +295,25 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat_act,
.dump = tcf_nat_dump,
.init = tcf_nat_init,
- .walk = tcf_nat_walker,
- .lookup = tcf_nat_search,
.size = sizeof(struct tcf_nat),
};
static __net_init int nat_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, nat_net_id);
+ struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
return tc_action_net_init(net, tn, &act_nat_ops);
}
static void __net_exit nat_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, nat_net_id);
+ tc_action_net_exit(net_list, act_nat_ops.net_id);
}
static struct pernet_operations nat_net_ops = {
.init = nat_init_net,
.exit_batch = nat_exit_net,
- .id = &nat_net_id,
+ .id = &act_nat_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 31fcd279c177..94ed5857ce67 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -21,7 +21,6 @@
#include <uapi/linux/tc_act/tc_pedit.h>
#include <net/pkt_cls.h>
-static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
@@ -139,7 +138,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
+ struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -149,7 +148,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *pattr;
struct tcf_pedit *p;
int ret = 0, err;
- int ksize;
+ int i, ksize;
u32 index;
if (!nla) {
@@ -228,6 +227,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+ p->tcfp_off_max_hint = 0;
+ for (i = 0; i < p->tcfp_nkeys; ++i) {
+ u32 cur = p->tcfp_keys[i].off;
+
+ /* sanitize the shift value for any later use */
+ p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+ p->tcfp_keys[i].shift);
+
+ /* The AT option can read a single byte, we can bound the actual
+ * value with uchar max.
+ */
+ cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+ /* Each key touches 4 bytes starting from the computed offset */
+ p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ }
p->tcfp_flags = parm->flags;
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -308,13 +323,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
+ u32 max_offset;
int i;
- if (skb_unclone(skb, GFP_ATOMIC))
- return p->tcf_action;
-
spin_lock(&p->tcf_lock);
+ max_offset = (skb_transport_header_was_set(skb) ?
+ skb_transport_offset(skb) :
+ skb_network_offset(skb)) +
+ p->tcfp_off_max_hint;
+ if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+ goto unlock;
+
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
@@ -403,6 +423,7 @@ bad:
p->tcf_qstats.overlimits++;
done:
bstats_update(&p->tcf_bstats, skb);
+unlock:
spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
@@ -470,25 +491,9 @@ nla_put_failure:
return -1;
}
-static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -503,6 +508,7 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
entry->id = FLOW_ACTION_ADD;
break;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
return -EOPNOTSUPP;
}
entry->mangle.htype = tcf_pedit_htype(act, k);
@@ -529,28 +535,26 @@ static struct tc_action_ops act_pedit_ops = {
.dump = tcf_pedit_dump,
.cleanup = tcf_pedit_cleanup,
.init = tcf_pedit_init,
- .walk = tcf_pedit_walker,
- .lookup = tcf_pedit_search,
.offload_act_setup = tcf_pedit_offload_act_setup,
.size = sizeof(struct tcf_pedit),
};
static __net_init int pedit_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, pedit_net_id);
+ struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id);
return tc_action_net_init(net, tn, &act_pedit_ops);
}
static void __net_exit pedit_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, pedit_net_id);
+ tc_action_net_exit(net_list, act_pedit_ops.net_id);
}
static struct pernet_operations pedit_net_ops = {
.init = pedit_init_net,
.exit_batch = pedit_exit_net,
- .id = &pedit_net_id,
+ .id = &act_pedit_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0923aa2b8f8a..0adb26e366a7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,19 +22,8 @@
/* Each policer is serialized by its individual spinlock */
-static unsigned int police_net_id;
static struct tc_action_ops act_police_ops;
-static int tcf_police_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, police_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
[TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE },
[TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
@@ -58,7 +47,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
- struct tc_action_net *tn = net_generic(net, police_net_id);
+ struct tc_action_net *tn = net_generic(net, act_police_ops.net_id);
struct tcf_police_params *new;
bool exists = false;
u32 index;
@@ -239,6 +228,20 @@ release_idr:
return err;
}
+static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
+{
+ u32 len;
+
+ if (skb_is_gso(skb))
+ return skb_gso_validate_mac_len(skb, limit);
+
+ len = qdisc_pkt_len(skb);
+ if (skb_at_tc_ingress(skb))
+ len += skb->mac_len;
+
+ return len <= limit;
+}
+
static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -261,7 +264,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
goto inc_overlimits;
}
- if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+ if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
if (!p->rate_present && !p->pps_present) {
ret = p->tcfp_result;
goto end;
@@ -398,27 +401,78 @@ nla_put_failure:
return -1;
}
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_act_to_flow_act(int tc_act, u32 *extval,
+ struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, police_net_id);
+ int act_id = -EOPNOTSUPP;
+
+ if (!TC_ACT_EXT_OPCODE(tc_act)) {
+ if (tc_act == TC_ACT_OK)
+ act_id = FLOW_ACTION_ACCEPT;
+ else if (tc_act == TC_ACT_SHOT)
+ act_id = FLOW_ACTION_DROP;
+ else if (tc_act == TC_ACT_PIPE)
+ act_id = FLOW_ACTION_PIPE;
+ else if (tc_act == TC_ACT_RECLASSIFY)
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\"");
+ else
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
+ } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) {
+ act_id = FLOW_ACTION_GOTO;
+ *extval = tc_act & TC_ACT_EXT_VAL_MASK;
+ } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) {
+ act_id = FLOW_ACTION_JUMP;
+ *extval = tc_act & TC_ACT_EXT_VAL_MASK;
+ } else if (tc_act == TC_ACT_UNSPEC) {
+ act_id = FLOW_ACTION_CONTINUE;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
+ }
- return tcf_idr_search(tn, a, index);
+ return act_id;
}
static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
+ struct tcf_police *police = to_police(act);
+ struct tcf_police_params *p;
+ int act_id;
+
+ p = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
entry->id = FLOW_ACTION_POLICE;
entry->police.burst = tcf_police_burst(act);
entry->police.rate_bytes_ps =
tcf_police_rate_bytes_ps(act);
+ entry->police.peakrate_bytes_ps = tcf_police_peakrate_bytes_ps(act);
+ entry->police.avrate = tcf_police_tcfp_ewma_rate(act);
+ entry->police.overhead = tcf_police_rate_overhead(act);
entry->police.burst_pkt = tcf_police_burst_pkt(act);
entry->police.rate_pkt_ps =
tcf_police_rate_pkt_ps(act);
entry->police.mtu = tcf_police_tcfp_mtu(act);
+
+ act_id = tcf_police_act_to_flow_act(police->tcf_action,
+ &entry->police.exceed.extval,
+ extack);
+ if (act_id < 0)
+ return act_id;
+
+ entry->police.exceed.act_id = act_id;
+
+ act_id = tcf_police_act_to_flow_act(p->tcfp_result,
+ &entry->police.notexceed.extval,
+ extack);
+ if (act_id < 0)
+ return act_id;
+
+ entry->police.notexceed.act_id = act_id;
+
*index_inc = 1;
} else {
struct flow_offload_action *fl_action = entry_data;
@@ -441,8 +495,6 @@ static struct tc_action_ops act_police_ops = {
.act = tcf_police_act,
.dump = tcf_police_dump,
.init = tcf_police_init,
- .walk = tcf_police_walker,
- .lookup = tcf_police_search,
.cleanup = tcf_police_cleanup,
.offload_act_setup = tcf_police_offload_act_setup,
.size = sizeof(struct tcf_police),
@@ -450,20 +502,20 @@ static struct tc_action_ops act_police_ops = {
static __net_init int police_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, police_net_id);
+ struct tc_action_net *tn = net_generic(net, act_police_ops.net_id);
return tc_action_net_init(net, tn, &act_police_ops);
}
static void __net_exit police_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, police_net_id);
+ tc_action_net_exit(net_list, act_police_ops.net_id);
}
static struct pernet_operations police_net_ops = {
.init = police_init_net,
.exit_batch = police_exit_net,
- .id = &police_net_id,
+ .id = &act_police_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9a22cdda6bbd..7a25477f5d99 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -23,7 +23,6 @@
#include <linux/if_arp.h>
-static unsigned int sample_net_id;
static struct tc_action_ops act_sample_ops;
static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
@@ -38,7 +37,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
+ struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
@@ -169,7 +168,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
psample_group = rcu_dereference_bh(s->psample_group);
/* randomly sample packets according to rate */
- if (psample_group && (prandom_u32() % s->rate == 0)) {
+ if (psample_group && (prandom_u32_max(s->rate) == 0)) {
if (!skb_at_tc_ingress(skb)) {
md.in_ifindex = skb->skb_iif;
md.out_ifindex = skb->dev->ifindex;
@@ -241,23 +240,6 @@ nla_put_failure:
return -1;
}
-static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static void tcf_psample_group_put(void *priv)
{
struct psample_group *group = priv;
@@ -291,7 +273,8 @@ static void tcf_offload_sample_get_group(struct flow_action_entry *entry,
}
static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -320,8 +303,6 @@ static struct tc_action_ops act_sample_ops = {
.dump = tcf_sample_dump,
.init = tcf_sample_init,
.cleanup = tcf_sample_cleanup,
- .walk = tcf_sample_walker,
- .lookup = tcf_sample_search,
.get_psample_group = tcf_sample_get_group,
.offload_act_setup = tcf_sample_offload_act_setup,
.size = sizeof(struct tcf_sample),
@@ -329,20 +310,20 @@ static struct tc_action_ops act_sample_ops = {
static __net_init int sample_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, sample_net_id);
+ struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id);
return tc_action_net_init(net, tn, &act_sample_ops);
}
static void __net_exit sample_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, sample_net_id);
+ tc_action_net_exit(net_list, act_sample_ops.net_id);
}
static struct pernet_operations sample_net_ops = {
.init = sample_init_net,
.exit_batch = sample_exit_net,
- .id = &sample_net_id,
+ .id = &act_sample_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8c1d60bde93e..18d376135461 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -18,7 +18,6 @@
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
-static unsigned int simp_net_id;
static struct tc_action_ops act_simp_ops;
#define SIMP_MAX_DATA 32
@@ -89,7 +88,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
+ struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -198,23 +197,6 @@ nla_put_failure:
return -1;
}
-static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
.id = TCA_ID_SIMP,
@@ -223,27 +205,25 @@ static struct tc_action_ops act_simp_ops = {
.dump = tcf_simp_dump,
.cleanup = tcf_simp_release,
.init = tcf_simp_init,
- .walk = tcf_simp_walker,
- .lookup = tcf_simp_search,
.size = sizeof(struct tcf_defact),
};
static __net_init int simp_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, simp_net_id);
+ struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id);
return tc_action_net_init(net, tn, &act_simp_ops);
}
static void __net_exit simp_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, simp_net_id);
+ tc_action_net_exit(net_list, act_simp_ops.net_id);
}
static struct pernet_operations simp_net_ops = {
.init = simp_init_net,
.exit_batch = simp_exit_net,
- .id = &simp_net_id,
+ .id = &act_simp_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ceba11b198bb..7f598784fd30 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -20,9 +20,22 @@
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
-static unsigned int skbedit_net_id;
static struct tc_action_ops act_skbedit_ops;
+static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
+ struct sk_buff *skb)
+{
+ u16 queue_mapping = params->queue_mapping;
+
+ if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
+ u32 hash = skb_get_hash(skb);
+
+ queue_mapping += hash % params->mapping_mod;
+ }
+
+ return netdev_cap_txqueue(skb->dev, queue_mapping);
+}
+
static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -58,8 +71,12 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
}
}
if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
- skb->dev->real_num_tx_queues > params->queue_mapping)
- skb_set_queue_mapping(skb, params->queue_mapping);
+ skb->dev->real_num_tx_queues > params->queue_mapping) {
+#ifdef CONFIG_NET_EGRESS
+ netdev_xmit_skip_txqueue(true);
+#endif
+ skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
+ }
if (params->flags & SKBEDIT_F_MARK) {
skb->mark &= ~params->mask;
skb->mark |= params->mark & params->mask;
@@ -92,6 +109,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
[TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) },
+ [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -99,7 +117,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+ struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id);
bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
@@ -108,6 +126,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
u16 *queue_mapping = NULL, *ptype = NULL;
+ u16 mapping_mod = 1;
bool exists = false;
int ret = 0, err;
u32 index;
@@ -153,6 +172,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
+ if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) {
+ u16 *queue_mapping_max;
+
+ if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] ||
+ !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping.");
+ return -EINVAL;
+ }
+
+ queue_mapping_max =
+ nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
+ if (*queue_mapping_max < *queue_mapping) {
+ NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min.");
+ return -EINVAL;
+ }
+
+ mapping_mod = *queue_mapping_max - *queue_mapping + 1;
+ flags |= SKBEDIT_F_TXQ_SKBHASH;
+ }
if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
flags |= SKBEDIT_F_INHERITDSFIELD;
}
@@ -204,8 +242,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new->flags = flags;
if (flags & SKBEDIT_F_PRIORITY)
params_new->priority = *priority;
- if (flags & SKBEDIT_F_QUEUE_MAPPING)
+ if (flags & SKBEDIT_F_QUEUE_MAPPING) {
params_new->queue_mapping = *queue_mapping;
+ params_new->mapping_mod = mapping_mod;
+ }
if (flags & SKBEDIT_F_MARK)
params_new->mark = *mark;
if (flags & SKBEDIT_F_PTYPE)
@@ -272,6 +312,13 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
goto nla_put_failure;
if (params->flags & SKBEDIT_F_INHERITDSFIELD)
pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+ if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
+ if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
+ params->queue_mapping + params->mapping_mod - 1))
+ goto nla_put_failure;
+
+ pure_flags |= SKBEDIT_F_TXQ_SKBHASH;
+ }
if (pure_flags != 0 &&
nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
goto nla_put_failure;
@@ -299,28 +346,12 @@ static void tcf_skbedit_cleanup(struct tc_action *a)
kfree_rcu(params, rcu);
}
-static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
{
return nla_total_size(sizeof(struct tc_skbedit))
+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
+ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */
+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
@@ -328,7 +359,8 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
}
static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -342,7 +374,14 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data
} else if (is_tcf_skbedit_priority(act)) {
entry->id = FLOW_ACTION_PRIORITY;
entry->priority = tcf_skbedit_priority(act);
+ } else if (is_tcf_skbedit_queue_mapping(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_skbedit_inheritdsfield(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used");
+ return -EOPNOTSUPP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
@@ -371,29 +410,27 @@ static struct tc_action_ops act_skbedit_ops = {
.dump = tcf_skbedit_dump,
.init = tcf_skbedit_init,
.cleanup = tcf_skbedit_cleanup,
- .walk = tcf_skbedit_walker,
.get_fill_size = tcf_skbedit_get_fill_size,
- .lookup = tcf_skbedit_search,
.offload_act_setup = tcf_skbedit_offload_act_setup,
.size = sizeof(struct tcf_skbedit),
};
static __net_init int skbedit_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+ struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id);
return tc_action_net_init(net, tn, &act_skbedit_ops);
}
static void __net_exit skbedit_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, skbedit_net_id);
+ tc_action_net_exit(net_list, act_skbedit_ops.net_id);
}
static struct pernet_operations skbedit_net_ops = {
.init = skbedit_init_net,
.exit_batch = skbedit_exit_net,
- .id = &skbedit_net_id,
+ .id = &act_skbedit_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 2083612d8780..d98758a63934 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -19,7 +19,6 @@
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
-static unsigned int skbmod_net_id;
static struct tc_action_ops act_skbmod_ops;
static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
@@ -103,7 +102,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id);
bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -276,23 +275,6 @@ nla_put_failure:
return -1;
}
-static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
.id = TCA_ACT_SKBMOD,
@@ -301,27 +283,25 @@ static struct tc_action_ops act_skbmod_ops = {
.dump = tcf_skbmod_dump,
.init = tcf_skbmod_init,
.cleanup = tcf_skbmod_cleanup,
- .walk = tcf_skbmod_walker,
- .lookup = tcf_skbmod_search,
.size = sizeof(struct tcf_skbmod),
};
static __net_init int skbmod_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id);
return tc_action_net_init(net, tn, &act_skbmod_ops);
}
static void __net_exit skbmod_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, skbmod_net_id);
+ tc_action_net_exit(net_list, act_skbmod_ops.net_id);
}
static struct pernet_operations skbmod_net_ops = {
.init = skbmod_init_net,
.exit_batch = skbmod_exit_net,
- .id = &skbmod_net_id,
+ .id = &act_skbmod_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 23aba03d26a8..2691a3d8e451 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -20,7 +20,6 @@
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
-static unsigned int tunnel_key_net_id;
static struct tc_action_ops act_tunnel_key_ops;
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
@@ -358,7 +357,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+ struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id);
bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
@@ -770,23 +769,6 @@ nla_put_failure:
return -1;
}
-static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static void tcf_tunnel_encap_put_tunnel(void *priv)
{
struct ip_tunnel_info *tunnel = priv;
@@ -808,7 +790,8 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
static int tcf_tunnel_key_offload_act_setup(struct tc_action *act,
void *entry_data,
u32 *index_inc,
- bool bind)
+ bool bind,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -823,6 +806,7 @@ static int tcf_tunnel_key_offload_act_setup(struct tc_action *act,
} else if (is_tcf_tunnel_release(act)) {
entry->id = FLOW_ACTION_TUNNEL_DECAP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
@@ -848,28 +832,26 @@ static struct tc_action_ops act_tunnel_key_ops = {
.dump = tunnel_key_dump,
.init = tunnel_key_init,
.cleanup = tunnel_key_release,
- .walk = tunnel_key_walker,
- .lookup = tunnel_key_search,
.offload_act_setup = tcf_tunnel_key_offload_act_setup,
.size = sizeof(struct tcf_tunnel_key),
};
static __net_init int tunnel_key_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+ struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id);
return tc_action_net_init(net, tn, &act_tunnel_key_ops);
}
static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, tunnel_key_net_id);
+ tc_action_net_exit(net_list, act_tunnel_key_ops.net_id);
}
static struct pernet_operations tunnel_key_net_ops = {
.init = tunnel_key_init_net,
.exit_batch = tunnel_key_exit_net,
- .id = &tunnel_key_net_id,
+ .id = &act_tunnel_key_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 756e2dcde1cd..7b24e898a3e6 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -16,7 +16,6 @@
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
-static unsigned int vlan_net_id;
static struct tc_action_ops act_vlan_ops;
static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
@@ -117,7 +116,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
+ struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_VLAN_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -333,16 +332,6 @@ nla_put_failure:
return -1;
}
-static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
- struct netlink_callback *cb, int type,
- const struct tc_action_ops *ops,
- struct netlink_ext_ack *extack)
-{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
-
- return tcf_generic_walker(tn, skb, cb, type, ops, extack);
-}
-
static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets,
u64 drops, u64 lastuse, bool hw)
{
@@ -353,13 +342,6 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets,
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
-{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
-
- return tcf_idr_search(tn, a, index);
-}
-
static size_t tcf_vlan_get_fill_size(const struct tc_action *act)
{
return nla_total_size(sizeof(struct tc_vlan))
@@ -369,7 +351,8 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act)
}
static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -390,7 +373,15 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
entry->vlan.proto = tcf_vlan_push_proto(act);
entry->vlan.prio = tcf_vlan_push_prio(act);
break;
+ case TCA_VLAN_ACT_POP_ETH:
+ entry->id = FLOW_ACTION_VLAN_POP_ETH;
+ break;
+ case TCA_VLAN_ACT_PUSH_ETH:
+ entry->id = FLOW_ACTION_VLAN_PUSH_ETH;
+ tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act);
+ break;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
@@ -407,6 +398,12 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
case TCA_VLAN_ACT_MODIFY:
fl_action->id = FLOW_ACTION_VLAN_MANGLE;
break;
+ case TCA_VLAN_ACT_POP_ETH:
+ fl_action->id = FLOW_ACTION_VLAN_POP_ETH;
+ break;
+ case TCA_VLAN_ACT_PUSH_ETH:
+ fl_action->id = FLOW_ACTION_VLAN_PUSH_ETH;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -423,30 +420,28 @@ static struct tc_action_ops act_vlan_ops = {
.dump = tcf_vlan_dump,
.init = tcf_vlan_init,
.cleanup = tcf_vlan_cleanup,
- .walk = tcf_vlan_walker,
.stats_update = tcf_vlan_stats_update,
.get_fill_size = tcf_vlan_get_fill_size,
- .lookup = tcf_vlan_search,
.offload_act_setup = tcf_vlan_offload_act_setup,
.size = sizeof(struct tcf_vlan),
};
static __net_init int vlan_init_net(struct net *net)
{
- struct tc_action_net *tn = net_generic(net, vlan_net_id);
+ struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id);
return tc_action_net_init(net, tn, &act_vlan_ops);
}
static void __net_exit vlan_exit_net(struct list_head *net_list)
{
- tc_action_net_exit(net_list, vlan_net_id);
+ tc_action_net_exit(net_list, act_vlan_ops.net_id);
}
static struct pernet_operations vlan_net_ops = {
.init = vlan_init_net,
.exit_batch = vlan_exit_net,
- .id = &vlan_net_id,
+ .id = &act_vlan_ops.net_id,
.size = sizeof(struct tc_action_net),
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d4e27c679123..50566db45949 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
+#ifdef CONFIG_NET_CLS_ACT
+DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+EXPORT_SYMBOL(tc_skb_ext_tc);
+
+void tc_skb_ext_tc_enable(void)
+{
+ static_branch_inc(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_enable);
+
+void tc_skb_ext_tc_disable(void)
+{
+ static_branch_dec(&tc_skb_ext_tc);
+}
+EXPORT_SYMBOL(tc_skb_ext_tc_disable);
+#endif
+
static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
{
return jhash_3words(tp->chain->index, tp->prio,
@@ -177,7 +194,7 @@ EXPORT_SYMBOL(register_tcf_proto_ops);
static struct workqueue_struct *tc_filter_wq;
-int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
+void unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t;
int rc = -ENOENT;
@@ -197,7 +214,8 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
}
}
write_unlock(&cls_mod_lock);
- return rc;
+
+ WARN(rc, "unregister tc filter kind(%s) failed %d\n", ops->kind, rc);
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
@@ -1044,7 +1062,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
/* Find qdisc */
if (!*parent) {
- *q = dev->qdisc;
+ *q = rcu_dereference(dev->qdisc);
*parent = (*q)->handle;
} else {
*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
@@ -1615,19 +1633,21 @@ int tcf_classify(struct sk_buff *skb,
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
&last_executed_chain);
- /* If we missed on some chain */
- if (ret == TC_ACT_UNSPEC && last_executed_chain) {
- struct tc_skb_cb *cb = tc_skb_cb(skb);
+ if (tc_skb_ext_tc_enabled()) {
+ /* If we missed on some chain */
+ if (ret == TC_ACT_UNSPEC && last_executed_chain) {
+ struct tc_skb_cb *cb = tc_skb_cb(skb);
- ext = tc_skb_ext_alloc(skb);
- if (WARN_ON_ONCE(!ext))
- return TC_ACT_SHOT;
- ext->chain = last_executed_chain;
- ext->mru = cb->mru;
- ext->post_ct = cb->post_ct;
- ext->post_ct_snat = cb->post_ct_snat;
- ext->post_ct_dnat = cb->post_ct_dnat;
- ext->zone = cb->zone;
+ ext = tc_skb_ext_alloc(skb);
+ if (WARN_ON_ONCE(!ext))
+ return TC_ACT_SHOT;
+ ext->chain = last_executed_chain;
+ ext->mru = cb->mru;
+ ext->post_ct = cb->post_ct;
+ ext->post_ct_snat = cb->post_ct_snat;
+ ext->post_ct_dnat = cb->post_ct_dnat;
+ ext->zone = cb->zone;
+ }
}
return ret;
@@ -1653,10 +1673,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain,
if (chain->flushing)
return -EAGAIN;
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
if (*chain_info->pprev == chain->filter_chain)
tcf_chain0_head_change(chain, tp);
tcf_proto_get(tp);
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
return 0;
@@ -1945,9 +1965,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool prio_allocate;
u32 parent;
u32 chain_index;
- struct Qdisc *q = NULL;
+ struct Qdisc *q;
struct tcf_chain_info chain_info;
- struct tcf_chain *chain = NULL;
+ struct tcf_chain *chain;
struct tcf_block *block;
struct tcf_proto *tp;
unsigned long cl;
@@ -1957,9 +1977,6 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
bool rtnl_held = false;
u32 flags;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
tp_created = 0;
@@ -1976,6 +1993,8 @@ replay:
tp = NULL;
cl = 0;
block = NULL;
+ q = NULL;
+ chain = NULL;
flags = 0;
if (prio == 0) {
@@ -2115,6 +2134,7 @@ replay:
}
if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
+ tfilter_put(tp, fh);
NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
err = -EINVAL;
goto errout;
@@ -2186,9 +2206,6 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
int err;
bool rtnl_held = false;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2585,7 +2602,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
parent = tcm->tcm_parent;
if (!parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
@@ -2798,17 +2815,14 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
struct tcmsg *t;
u32 parent;
u32 chain_index;
- struct Qdisc *q = NULL;
- struct tcf_chain *chain = NULL;
+ struct Qdisc *q;
+ struct tcf_chain *chain;
struct tcf_block *block;
unsigned long cl;
int err;
- if (n->nlmsg_type != RTM_GETCHAIN &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
+ q = NULL;
err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2959,7 +2973,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
if (!tcm->tcm_parent)
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
@@ -3491,22 +3505,27 @@ EXPORT_SYMBOL(tc_cleanup_offload_action);
static int tc_setup_offload_act(struct tc_action *act,
struct flow_action_entry *entry,
- u32 *index_inc)
+ u32 *index_inc,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
- if (act->ops->offload_act_setup)
- return act->ops->offload_act_setup(act, entry, index_inc, true);
- else
+ if (act->ops->offload_act_setup) {
+ return act->ops->offload_act_setup(act, entry, index_inc, true,
+ extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "Action does not support offload");
return -EOPNOTSUPP;
+ }
#else
return 0;
#endif
}
int tc_setup_action(struct flow_action *flow_action,
- struct tc_action *actions[])
+ struct tc_action *actions[],
+ struct netlink_ext_ack *extack)
{
- int i, j, index, err = 0;
+ int i, j, k, index, err = 0;
struct tc_action *act;
BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
@@ -3526,14 +3545,18 @@ int tc_setup_action(struct flow_action *flow_action,
if (err)
goto err_out_locked;
- entry->hw_stats = tc_act_hw_stats(act->hw_stats);
- entry->hw_index = act->tcfa_index;
index = 0;
- err = tc_setup_offload_act(act, entry, &index);
- if (!err)
- j += index;
- else
+ err = tc_setup_offload_act(act, entry, &index, extack);
+ if (err)
goto err_out_locked;
+
+ for (k = 0; k < index ; k++) {
+ entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
+ entry[k].hw_index = act->tcfa_index;
+ }
+
+ j += index;
+
spin_unlock_bh(&act->tcfa_lock);
}
@@ -3548,13 +3571,14 @@ err_out_locked:
}
int tc_setup_offload_action(struct flow_action *flow_action,
- const struct tcf_exts *exts)
+ const struct tcf_exts *exts,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
if (!exts)
return 0;
- return tc_setup_action(flow_action, exts->actions);
+ return tc_setup_action(flow_action, exts->actions, extack);
#else
return 0;
#endif
@@ -3606,9 +3630,6 @@ int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
if (err)
return err;
- if (!block_index)
- return 0;
-
qe->info.binder_type = binder_type;
qe->info.chain_head_change = tcf_chain_head_change_dflt;
qe->info.chain_head_change_priv = &qe->filter_chain;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 8158fc9ee1ab..d229ce99e554 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -251,15 +251,8 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
struct basic_filter *f;
list_for_each_entry(f, &head->flist, link) {
- if (arg->count < arg->skip)
- goto skip;
-
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, f))
break;
- }
-skip:
- arg->count++;
}
}
@@ -268,12 +261,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct basic_filter *f = fh;
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
+ tc_cls_bind_class(classid, cl, q, &f->res, base);
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index df19a847829e..bc317b3eac12 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -102,6 +102,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(prog->filter, skb);
}
+ if (unlikely(!skb->tstamp && skb->mono_delivery_time))
+ skb->mono_delivery_time = 0;
if (prog->exts_integrated) {
res->class = 0;
@@ -633,12 +635,7 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl,
{
struct cls_bpf_prog *prog = fh;
- if (prog && prog->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &prog->res, base);
- else
- __tcf_unbind_filter(q, &prog->res);
- }
+ tc_cls_bind_class(classid, cl, q, &prog->res, base);
}
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
@@ -648,14 +645,8 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
struct cls_bpf_prog *prog;
list_for_each_entry(prog, &head->plist, link) {
- if (arg->count < arg->skip)
- goto skip;
- if (arg->fn(tp, prog, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, prog))
break;
- }
-skip:
- arg->count++;
}
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 972303aa8edd..014cd3de7b5d 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -683,14 +683,8 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
struct flow_filter *f;
list_for_each_entry(f, &head->filters, list) {
- if (arg->count < arg->skip)
- goto skip;
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, f))
break;
- }
-skip:
- arg->count++;
}
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1a9b1f140f9e..25bc57ee6ea1 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -16,6 +16,7 @@
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/mpls.h>
+#include <linux/ppp_defs.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
@@ -25,6 +26,7 @@
#include <net/geneve.h>
#include <net/vxlan.h>
#include <net/erspan.h>
+#include <net/gtp.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -62,15 +64,12 @@ struct fl_flow_key {
struct flow_dissector_key_ip ip;
struct flow_dissector_key_ip enc_ip;
struct flow_dissector_key_enc_opts enc_opts;
- union {
- struct flow_dissector_key_ports tp;
- struct {
- struct flow_dissector_key_ports tp_min;
- struct flow_dissector_key_ports tp_max;
- };
- } tp_range;
+ struct flow_dissector_key_ports_range tp_range;
struct flow_dissector_key_ct ct;
struct flow_dissector_key_hash hash;
+ struct flow_dissector_key_num_of_vlans num_of_vlans;
+ struct flow_dissector_key_pppoe pppoe;
+ struct flow_dissector_key_l2tpv3 l2tpv3;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -463,14 +462,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
- err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts,
+ cls_flower.common.extack);
if (err) {
kfree(cls_flower.rule);
- if (skip_sw) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+
+ return skip_sw ? err : 0;
}
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
@@ -713,6 +710,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 },
};
@@ -723,6 +724,7 @@ enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_GTP] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -747,6 +749,12 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
};
static const struct nla_policy
+gtp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_GTP_QFI] = { .type = NLA_U8 },
+};
+
+static const struct nla_policy
mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 },
@@ -1005,6 +1013,7 @@ static int fl_set_key_mpls(struct nlattr **tb,
static void fl_set_key_vlan(struct nlattr **tb,
__be16 ethertype,
int vlan_id_key, int vlan_prio_key,
+ int vlan_next_eth_type_key,
struct flow_dissector_key_vlan *key_val,
struct flow_dissector_key_vlan *key_mask)
{
@@ -1021,8 +1030,59 @@ static void fl_set_key_vlan(struct nlattr **tb,
VLAN_PRIORITY_MASK;
key_mask->vlan_priority = VLAN_PRIORITY_MASK;
}
- key_val->vlan_tpid = ethertype;
- key_mask->vlan_tpid = cpu_to_be16(~0);
+ if (ethertype) {
+ key_val->vlan_tpid = ethertype;
+ key_mask->vlan_tpid = cpu_to_be16(~0);
+ }
+ if (tb[vlan_next_eth_type_key]) {
+ key_val->vlan_eth_type =
+ nla_get_be16(tb[vlan_next_eth_type_key]);
+ key_mask->vlan_eth_type = cpu_to_be16(~0);
+ }
+}
+
+static void fl_set_key_pppoe(struct nlattr **tb,
+ struct flow_dissector_key_pppoe *key_val,
+ struct flow_dissector_key_pppoe *key_mask,
+ struct fl_flow_key *key,
+ struct fl_flow_key *mask)
+{
+ /* key_val::type must be set to ETH_P_PPP_SES
+ * because ETH_P_PPP_SES was stored in basic.n_proto
+ * which might get overwritten by ppp_proto
+ * or might be set to 0, the role of key_val::type
+ * is simmilar to vlan_key::tpid
+ */
+ key_val->type = htons(ETH_P_PPP_SES);
+ key_mask->type = cpu_to_be16(~0);
+
+ if (tb[TCA_FLOWER_KEY_PPPOE_SID]) {
+ key_val->session_id =
+ nla_get_be16(tb[TCA_FLOWER_KEY_PPPOE_SID]);
+ key_mask->session_id = cpu_to_be16(~0);
+ }
+ if (tb[TCA_FLOWER_KEY_PPP_PROTO]) {
+ key_val->ppp_proto =
+ nla_get_be16(tb[TCA_FLOWER_KEY_PPP_PROTO]);
+ key_mask->ppp_proto = cpu_to_be16(~0);
+
+ if (key_val->ppp_proto == htons(PPP_IP)) {
+ key->basic.n_proto = htons(ETH_P_IP);
+ mask->basic.n_proto = cpu_to_be16(~0);
+ } else if (key_val->ppp_proto == htons(PPP_IPV6)) {
+ key->basic.n_proto = htons(ETH_P_IPV6);
+ mask->basic.n_proto = cpu_to_be16(~0);
+ } else if (key_val->ppp_proto == htons(PPP_MPLS_UC)) {
+ key->basic.n_proto = htons(ETH_P_MPLS_UC);
+ mask->basic.n_proto = cpu_to_be16(~0);
+ } else if (key_val->ppp_proto == htons(PPP_MPLS_MC)) {
+ key->basic.n_proto = htons(ETH_P_MPLS_MC);
+ mask->basic.n_proto = cpu_to_be16(~0);
+ }
+ } else {
+ key->basic.n_proto = 0;
+ mask->basic.n_proto = cpu_to_be16(0);
+ }
}
static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -1262,6 +1322,49 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
return sizeof(*md);
}
+static int fl_set_gtp_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1];
+ struct gtp_pdu_session_info *sinfo;
+ u8 len = key->enc_opts.len;
+ int err;
+
+ sinfo = (struct gtp_pdu_session_info *)&key->enc_opts.data[len];
+ memset(sinfo, 0xff, option_len);
+
+ if (!depth)
+ return sizeof(*sinfo);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GTP) {
+ NL_SET_ERR_MSG_MOD(extack, "Non-gtp option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GTP_MAX, nla,
+ gtp_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len &&
+ (!tb[TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_GTP_QFI])) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing tunnel key gtp option pdu type or qfi");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE])
+ sinfo->pdu_type =
+ nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE]);
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_GTP_QFI])
+ sinfo->qfi = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_GTP_QFI]);
+
+ return sizeof(*sinfo);
+}
+
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1386,6 +1489,38 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return -EINVAL;
}
break;
+ case TCA_FLOWER_KEY_ENC_OPTS_GTP:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Duplicate type for gtp options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT;
+ option_len = fl_set_gtp_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT;
+ option_len = fl_set_gtp_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Key and mask miss aligned");
+ return -EINVAL;
+ }
+ break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
return -EINVAL;
@@ -1492,6 +1627,26 @@ static int fl_set_key_ct(struct nlattr **tb,
return 0;
}
+static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype,
+ struct fl_flow_key *key, struct fl_flow_key *mask,
+ int vthresh)
+{
+ const bool good_num_of_vlans = key->num_of_vlans.num_of_vlans > vthresh;
+
+ if (!tb) {
+ *ethertype = 0;
+ return good_num_of_vlans;
+ }
+
+ *ethertype = nla_get_be16(tb);
+ if (good_num_of_vlans || eth_type_vlan(*ethertype))
+ return true;
+
+ key->basic.n_proto = *ethertype;
+ mask->basic.n_proto = cpu_to_be16(~0);
+ return false;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1513,38 +1668,36 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src));
-
- if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
- ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
-
- if (eth_type_vlan(ethertype)) {
- fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
- TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
- &mask->vlan);
-
- if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
- ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
- if (eth_type_vlan(ethertype)) {
- fl_set_key_vlan(tb, ethertype,
- TCA_FLOWER_KEY_CVLAN_ID,
- TCA_FLOWER_KEY_CVLAN_PRIO,
- &key->cvlan, &mask->cvlan);
- fl_set_key_val(tb, &key->basic.n_proto,
- TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
- &mask->basic.n_proto,
- TCA_FLOWER_UNSPEC,
- sizeof(key->basic.n_proto));
- } else {
- key->basic.n_proto = ethertype;
- mask->basic.n_proto = cpu_to_be16(~0);
- }
- }
- } else {
- key->basic.n_proto = ethertype;
- mask->basic.n_proto = cpu_to_be16(~0);
+ fl_set_key_val(tb, &key->num_of_vlans,
+ TCA_FLOWER_KEY_NUM_OF_VLANS,
+ &mask->num_of_vlans,
+ TCA_FLOWER_UNSPEC,
+ sizeof(key->num_of_vlans));
+
+ if (is_vlan_key(tb[TCA_FLOWER_KEY_ETH_TYPE], &ethertype, key, mask, 0)) {
+ fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+ TCA_FLOWER_KEY_VLAN_PRIO,
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ &key->vlan, &mask->vlan);
+
+ if (is_vlan_key(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE],
+ &ethertype, key, mask, 1)) {
+ fl_set_key_vlan(tb, ethertype,
+ TCA_FLOWER_KEY_CVLAN_ID,
+ TCA_FLOWER_KEY_CVLAN_PRIO,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+ &key->cvlan, &mask->cvlan);
+ fl_set_key_val(tb, &key->basic.n_proto,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+ &mask->basic.n_proto,
+ TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
}
}
+ if (key->basic.n_proto == htons(ETH_P_PPP_SES))
+ fl_set_key_pppoe(tb, &key->pppoe, &mask->pppoe, key, mask);
+
if (key->basic.n_proto == htons(ETH_P_IP) ||
key->basic.n_proto == htons(ETH_P_IPV6)) {
fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
@@ -1639,6 +1792,11 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
sizeof(key->arp.tha));
+ } else if (key->basic.ip_proto == IPPROTO_L2TP) {
+ fl_set_key_val(tb, &key->l2tpv3.session_id,
+ TCA_FLOWER_KEY_L2TPV3_SID,
+ &mask->l2tpv3.session_id, TCA_FLOWER_UNSPEC,
+ sizeof(key->l2tpv3.session_id));
}
if (key->basic.ip_proto == IPPROTO_TCP ||
@@ -1815,6 +1973,12 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_CT, ct);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_HASH, hash);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PPPOE, pppoe);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -2271,11 +2435,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.rule->match.mask = &f->mask->key;
cls_flower.rule->match.key = &f->mkey;
- err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts,
+ cls_flower.common.extack);
if (err) {
kfree(cls_flower.rule);
if (tc_skip_sw(f->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
__fl_put(f);
return err;
}
@@ -2761,6 +2925,34 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fl_dump_key_gtp_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+
+{
+ struct gtp_pdu_session_info *session_info;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_GTP);
+ if (!nest)
+ goto nla_put_failure;
+
+ session_info = (struct gtp_pdu_session_info *)&enc_opts->data[0];
+
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE,
+ session_info->pdu_type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GTP_QFI, session_info->qfi))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int fl_dump_key_ct(struct sk_buff *skb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask)
@@ -2824,6 +3016,11 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
if (err)
goto nla_put_failure;
break;
+ case TUNNEL_GTP_OPT:
+ err = fl_dump_key_gtp_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
default:
goto nla_put_failure;
}
@@ -2870,6 +3067,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->basic.n_proto)))
goto nla_put_failure;
+ if (mask->num_of_vlans.num_of_vlans) {
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_NUM_OF_VLANS, key->num_of_vlans.num_of_vlans))
+ goto nla_put_failure;
+ }
+
if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
goto nla_put_failure;
@@ -2886,13 +3088,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
if (mask->basic.n_proto) {
- if (mask->cvlan.vlan_tpid) {
+ if (mask->cvlan.vlan_eth_type) {
if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
key->basic.n_proto))
goto nla_put_failure;
- } else if (mask->vlan.vlan_tpid) {
+ } else if (mask->vlan.vlan_eth_type) {
if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
- key->basic.n_proto))
+ key->vlan.vlan_eth_type))
goto nla_put_failure;
}
}
@@ -2905,6 +3107,17 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
fl_dump_key_ip(skb, false, &key->ip, &mask->ip)))
goto nla_put_failure;
+ if (mask->pppoe.session_id) {
+ if (nla_put_be16(skb, TCA_FLOWER_KEY_PPPOE_SID,
+ key->pppoe.session_id))
+ goto nla_put_failure;
+ }
+ if (mask->basic.n_proto && mask->pppoe.ppp_proto) {
+ if (nla_put_be16(skb, TCA_FLOWER_KEY_PPP_PROTO,
+ key->pppoe.ppp_proto))
+ goto nla_put_failure;
+ }
+
if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
@@ -2992,6 +3205,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
sizeof(key->arp.tha))))
goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_L2TP &&
+ fl_dump_key_val(skb, &key->l2tpv3.session_id,
+ TCA_FLOWER_KEY_L2TPV3_SID,
+ &mask->l2tpv3.session_id,
+ TCA_FLOWER_UNSPEC,
+ sizeof(key->l2tpv3.session_id)))
+ goto nla_put_failure;
if ((key->basic.ip_proto == IPPROTO_TCP ||
key->basic.ip_proto == IPPROTO_UDP ||
@@ -3185,12 +3405,7 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct cls_fl_filter *f = fh;
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
+ tc_cls_bind_class(classid, cl, q, &f->res, base);
}
static bool fl_delete_empty(struct tcf_proto *tp)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8654b0ce997c..a32351da968c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -358,15 +358,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
for (f = rtnl_dereference(head->ht[h]); f;
f = rtnl_dereference(f->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, f))
return;
- }
- arg->count++;
}
}
}
@@ -423,12 +416,7 @@ static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct fw_filter *f = fh;
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
+ tc_cls_bind_class(classid, cl, q, &f->res, base);
}
static struct tcf_proto_ops cls_fw_ops __read_mostly = {
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index ca5670fd5228..39a5d9c170de 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -97,16 +97,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.cookie = cookie;
- err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts,
+ cls_mall.common.extack);
if (err) {
kfree(cls_mall.rule);
mall_destroy_hw_filter(tp, head, cookie, NULL);
- if (skip_sw)
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- else
- err = 0;
- return err;
+ return skip_sw ? err : 0;
}
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
@@ -302,14 +299,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = (unsigned long)head;
- err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts,
+ cls_mall.common.extack);
if (err) {
kfree(cls_mall.rule);
- if (add && tc_skip_sw(head->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+
+ return add && tc_skip_sw(head->flags) ? err : 0;
}
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
@@ -318,10 +313,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
tc_cleanup_offload_action(&cls_mall.rule->action);
kfree(cls_mall.rule);
- if (err)
- return err;
-
- return 0;
+ return err;
}
static void mall_stats_hw_filter(struct tcf_proto *tp,
@@ -402,12 +394,7 @@ static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct cls_mall_head *head = fh;
- if (head && head->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &head->res, base);
- else
- __tcf_unbind_filter(q, &head->res);
- }
+ tc_cls_bind_class(classid, cl, q, &head->res, base);
}
static struct tcf_proto_ops cls_mall_ops __read_mostly = {
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a35ab8c27866..9e43b929d4ca 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -424,6 +424,11 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
return -EINVAL;
}
+ if (!nhandle) {
+ NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid");
+ return -EINVAL;
+ }
+
h1 = to_hash(nhandle);
b = rtnl_dereference(head->table[h1]);
if (!b) {
@@ -477,8 +482,13 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
int err;
bool new = true;
+ if (!handle) {
+ NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid");
+ return -EINVAL;
+ }
+
if (opt == NULL)
- return handle ? -EINVAL : 0;
+ return -EINVAL;
err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
route4_policy, NULL);
@@ -486,7 +496,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return err;
fold = *arg;
- if (fold && handle && fold->handle != handle)
+ if (fold && fold->handle != handle)
return -EINVAL;
err = -ENOBUFS;
@@ -526,7 +536,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
- if (fold && fold->handle && f->handle != fold->handle) {
+ if (fold) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
@@ -577,15 +587,8 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
for (f = rtnl_dereference(b->ht[h1]);
f;
f = rtnl_dereference(f->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, f))
return;
- }
- arg->count++;
}
}
}
@@ -646,12 +649,7 @@ static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct route4_filter *f = fh;
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
+ tc_cls_bind_class(classid, cl, q, &f->res, base);
}
static struct tcf_proto_ops cls_route4_ops __read_mostly = {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 5cd9d6b143c4..b00a7dbd0587 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -671,15 +671,8 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
for (f = rtnl_dereference(s->ht[h1]); f;
f = rtnl_dereference(f->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, f))
return;
- }
- arg->count++;
}
}
}
@@ -740,12 +733,7 @@ static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct rsvp_filter *f = fh;
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
+ tc_cls_bind_class(classid, cl, q, &f->res, base);
}
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 742c7d49a958..1c9eeb98d826 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -566,13 +566,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
for (i = 0; i < p->hash; i++) {
if (!p->perfect[i].res.class)
continue;
- if (walker->count >= walker->skip) {
- if (walker->fn(tp, p->perfect + i, walker) < 0) {
- walker->stop = 1;
- return;
- }
- }
- walker->count++;
+ if (!tc_cls_stats_dump(tp, walker, p->perfect + i))
+ return;
}
}
if (!p->h)
@@ -580,13 +575,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
for (i = 0; i < p->hash; i++) {
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
- if (walker->count >= walker->skip) {
- if (walker->fn(tp, &f->result, walker) < 0) {
- walker->stop = 1;
- return;
- }
- }
- walker->count++;
+ if (!tc_cls_stats_dump(tp, walker, &f->result))
+ return;
}
}
}
@@ -701,12 +691,7 @@ static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
{
struct tcindex_filter_result *r = fh;
- if (r && r->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &r->res, base);
- else
- __tcf_unbind_filter(q, &r->res);
- }
+ tc_cls_bind_class(classid, cl, q, &r->res, base);
}
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index cf5649292ee0..34d25f7a0687 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+static void __u32_destroy_key(struct tc_u_knode *n)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- tcf_exts_put_net(&n->exts);
if (ht && --ht->refcnt == 0)
kfree(ht);
+ kfree(n);
+}
+
+static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+{
+ tcf_exts_put_net(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
if (free_pf)
free_percpu(n->pf);
@@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
if (free_pf)
free_percpu(n->pcpu_success);
#endif
- kfree(n);
- return 0;
+ __u32_destroy_key(n);
}
/* u32_delete_key_rcu should be called when free'ing a copied
@@ -811,10 +815,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
- /* bump reference count as long as we hold pointer to structure */
- if (ht)
- ht->refcnt++;
-
#ifdef CONFIG_CLS_U32_PERF
/* Statistics may be incremented by readers during update
* so we must keep them in tact. When the node is later destroyed
@@ -836,6 +836,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
return NULL;
}
+ /* bump reference count as long as we hold pointer to structure */
+ if (ht)
+ ht->refcnt++;
+
return new;
}
@@ -900,13 +904,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
extack);
if (err) {
- u32_destroy_key(new, false);
+ __u32_destroy_key(new);
return err;
}
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
- u32_destroy_key(new, false);
+ __u32_destroy_key(new);
return err;
}
@@ -1036,7 +1040,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- memcpy(&n->sel, s, sel_size);
+ unsafe_memcpy(&n->sel, s, sel_size,
+ /* A composite flex-array structure destination,
+ * which was correctly sized with struct_size(),
+ * bounds-checked against nla_len(), and allocated
+ * above. */);
RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
@@ -1121,26 +1129,16 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio)
continue;
- if (arg->count >= arg->skip) {
- if (arg->fn(tp, ht, arg) < 0) {
- arg->stop = 1;
- return;
- }
- }
- arg->count++;
+
+ if (!tc_cls_stats_dump(tp, arg, ht))
+ return;
+
for (h = 0; h <= ht->divisor; h++) {
for (n = rtnl_dereference(ht->ht[h]);
n;
n = rtnl_dereference(n->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, n, arg) < 0) {
- arg->stop = 1;
+ if (!tc_cls_stats_dump(tp, arg, n))
return;
- }
- arg->count++;
}
}
}
@@ -1252,12 +1250,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
{
struct tc_u_knode *n = fh;
- if (n && n->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &n->res, base);
- else
- __tcf_unbind_filter(q, &n->res);
- }
+ tc_cls_bind_class(classid, cl, q, &n->res, base);
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 0a04468b7314..49bae3d5006b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -311,12 +311,15 @@ META_COLLECTOR(int_sk_bound_if)
META_COLLECTOR(var_sk_bound_if)
{
+ int bound_dev_if;
+
if (skip_nonlocal(skb)) {
*err = -1;
return;
}
- if (skb->sk->sk_bound_dev_if == 0) {
+ bound_dev_if = READ_ONCE(skb->sk->sk_bound_dev_if);
+ if (bound_dev_if == 0) {
dst->value = (unsigned long) "any";
dst->len = 3;
} else {
@@ -324,7 +327,7 @@ META_COLLECTOR(var_sk_bound_if)
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(skb->sk),
- skb->sk->sk_bound_dev_if);
+ bound_dev_if);
*err = var_dev(dev, dst);
rcu_read_unlock();
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c9c6f49f9c28..4a27dfb1ba0f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -171,7 +171,7 @@ out_einval:
}
EXPORT_SYMBOL(register_qdisc);
-int unregister_qdisc(struct Qdisc_ops *qops)
+void unregister_qdisc(struct Qdisc_ops *qops)
{
struct Qdisc_ops *q, **qp;
int err = -ENOENT;
@@ -186,7 +186,8 @@ int unregister_qdisc(struct Qdisc_ops *qops)
err = 0;
}
write_unlock(&qdisc_mod_lock);
- return err;
+
+ WARN(err, "unregister qdisc(%s) failed\n", qops->id);
}
EXPORT_SYMBOL(unregister_qdisc);
@@ -194,7 +195,7 @@ EXPORT_SYMBOL(unregister_qdisc);
void qdisc_get_default(char *name, size_t len)
{
read_lock(&qdisc_mod_lock);
- strlcpy(name, default_qdisc_ops->id, len);
+ strscpy(name, default_qdisc_ops->id, len);
read_unlock(&qdisc_mod_lock);
}
@@ -301,7 +302,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -320,7 +321,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
if (!handle)
return NULL;
- q = qdisc_match_from_root(dev->qdisc, handle);
+ q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
if (q)
goto out;
@@ -867,6 +868,23 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
}
EXPORT_SYMBOL(qdisc_offload_graft_helper);
+void qdisc_offload_query_caps(struct net_device *dev,
+ enum tc_setup_type type,
+ void *caps, size_t caps_len)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_query_caps_base base = {
+ .type = type,
+ .caps = caps,
+ };
+
+ memset(caps, 0, caps_len);
+
+ if (ops->ndo_setup_tc)
+ ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
+}
+EXPORT_SYMBOL(qdisc_offload_query_caps);
+
static void qdisc_offload_graft_root(struct net_device *dev,
struct Qdisc *new, struct Qdisc *old,
struct netlink_ext_ack *extack)
@@ -1062,7 +1080,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
qdisc_offload_graft_root(dev, new, old, extack);
- if (new && new->ops->attach)
+ if (new && new->ops->attach && !ingress)
goto skip;
for (i = 0; i < num_q; i++) {
@@ -1081,11 +1099,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
skip:
if (!ingress) {
- notify_and_destroy(net, skb, n, classid,
- dev->qdisc, new);
+ old = rtnl_dereference(dev->qdisc);
if (new && !new->ops->attach)
qdisc_refcount_inc(new);
- dev->qdisc = new ? : &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
+
+ notify_and_destroy(net, skb, n, classid, old, new);
if (new && new->ops->attach)
new->ops->attach(new);
@@ -1163,7 +1182,7 @@ static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
static struct Qdisc *qdisc_create(struct net_device *dev,
struct netdev_queue *dev_queue,
- struct Qdisc *p, u32 parent, u32 handle,
+ u32 parent, u32 handle,
struct nlattr **tca, int *errp,
struct netlink_ext_ack *extack)
{
@@ -1204,7 +1223,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
err = -ENOENT;
if (!ops) {
- NL_SET_ERR_MSG(extack, "Specified qdisc not found");
+ NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
@@ -1292,7 +1311,7 @@ err_out5:
if (ops->destroy)
ops->destroy(sch);
err_out3:
- dev_put_track(dev, &sch->dev_tracker);
+ netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
module_put(ops->owner);
@@ -1424,10 +1443,6 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *p = NULL;
int err;
- if ((n->nlmsg_type != RTM_GETQDISC) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -1451,7 +1466,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
if (!q) {
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
@@ -1508,9 +1523,6 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q, *p;
int err;
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
replay:
/* Reinit, just in case something touches this. */
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
@@ -1540,7 +1552,7 @@ replay:
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
} else {
- q = dev->qdisc;
+ q = rtnl_dereference(dev->qdisc);
}
/* It may be default qdisc, ignore it */
@@ -1640,7 +1652,7 @@ create_n_graft:
}
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
- q = qdisc_create(dev, dev_ingress_queue(dev), p,
+ q = qdisc_create(dev, dev_ingress_queue(dev),
tcm->tcm_parent, tcm->tcm_parent,
tca, &err, extack);
} else {
@@ -1657,7 +1669,7 @@ create_n_graft:
else
dev_queue = netdev_get_tx_queue(dev, 0);
- q = qdisc_create(dev, dev_queue, p,
+ q = qdisc_create(dev, dev_queue,
tcm->tcm_parent, tcm->tcm_handle,
tca, &err, extack);
}
@@ -1762,7 +1774,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0;
q_idx = 0;
- if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
+ if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
+ skb, cb, &q_idx, s_q_idx,
true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
@@ -1903,7 +1916,7 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
{
struct tcf_bind_args *a = (void *)arg;
- if (tp->ops->bind_class) {
+ if (n && tp->ops->bind_class) {
struct Qdisc *q = tcf_block_q(tp->chain->block);
sch_tree_lock(q);
@@ -1991,10 +2004,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
u32 qid;
int err;
- if ((n->nlmsg_type != RTM_GETTCLASS) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
rtm_tca_policy, extack);
if (err < 0)
@@ -2033,7 +2042,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
} else if (qid1) {
qid = qid1;
} else if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
/* Now qid is genuine qdisc handle consistent
* both with parent and child.
@@ -2044,7 +2053,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
portid = TC_H_MAKE(qid, portid);
} else {
if (qid == 0)
- qid = dev->qdisc->handle;
+ qid = rtnl_dereference(dev->qdisc)->handle;
}
/* OK. Locate qdisc */
@@ -2205,7 +2214,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
+ if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
+ skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 4c8e994cf0a5..f52255fea652 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -354,12 +354,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
if (walker->stop)
return;
list_for_each_entry(flow, &p->flows, list) {
- if (walker->count >= walker->skip &&
- walker->fn(sch, (unsigned long)flow, walker) < 0) {
- walker->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker))
break;
- }
- walker->count++;
}
}
@@ -577,7 +573,6 @@ static void atm_tc_reset(struct Qdisc *sch)
pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list)
qdisc_reset(flow->q);
- sch->q.qlen = 0;
}
static void atm_tc_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index a43a58a73d09..3ed0c3342189 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -573,7 +573,7 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
/* Simple BLUE implementation. Lack of ECN is deliberate. */
if (vars->p_drop)
- drop |= (prandom_u32() < vars->p_drop);
+ drop |= (get_random_u32() < vars->p_drop);
/* Overload the drop_next field as an activity timeout */
if (!vars->count)
@@ -2092,11 +2092,11 @@ retry:
WARN_ON(host_load > CAKE_QUEUES);
- /* The shifted prandom_u32() is a way to apply dithering to
- * avoid accumulating roundoff errors
+ /* The get_random_u16() is a way to apply dithering to avoid
+ * accumulating roundoff errors
*/
flow->deficit += (b->flow_quantum * quantum_div[host_load] +
- (prandom_u32() >> 16)) >> 16;
+ get_random_u16()) >> 16;
list_move_tail(&flow->flowchain, &b->old_flows);
goto retry;
@@ -2224,8 +2224,12 @@ retry:
static void cake_reset(struct Qdisc *sch)
{
+ struct cake_sched_data *q = qdisc_priv(sch);
u32 c;
+ if (!q->tins)
+ return;
+
for (c = 0; c < CAKE_MAX_TINS; c++)
cake_clear_tin(sch, c);
}
@@ -2569,9 +2573,6 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
struct nlattr *tb[TCA_CAKE_MAX + 1];
int err;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
extack);
if (err < 0)
@@ -3064,16 +3065,13 @@ static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
struct cake_tin_data *b = &q->tins[q->tin_order[i]];
for (j = 0; j < CAKE_QUEUES; j++) {
- if (list_empty(&b->flows[j].flowchain) ||
- arg->count < arg->skip) {
+ if (list_empty(&b->flows[j].flowchain)) {
arg->count++;
continue;
}
- if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, i * CAKE_QUEUES + j + 1,
+ arg))
break;
- }
- arg->count++;
}
}
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 02d9f0dfe356..6568e17c4c63 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -149,7 +149,6 @@ struct cbq_sched_data {
psched_time_t now; /* Cached timestamp */
unsigned int pmask;
- struct hrtimer delay_timer;
struct qdisc_watchdog watchdog; /* Watchdog timer,
started when CBQ has
backlog, but cannot
@@ -441,81 +440,6 @@ static void cbq_overlimit(struct cbq_class *cl)
}
}
-static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
- psched_time_t now)
-{
- struct cbq_class *cl;
- struct cbq_class *cl_prev = q->active[prio];
- psched_time_t sched = now;
-
- if (cl_prev == NULL)
- return 0;
-
- do {
- cl = cl_prev->next_alive;
- if (now - cl->penalized > 0) {
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
- cl->cpriority = cl->priority;
- cl->delayed = 0;
- cbq_activate_class(cl);
-
- if (cl == q->active[prio]) {
- q->active[prio] = cl_prev;
- if (cl == q->active[prio]) {
- q->active[prio] = NULL;
- return 0;
- }
- }
-
- cl = cl_prev->next_alive;
- } else if (sched - cl->penalized > 0)
- sched = cl->penalized;
- } while ((cl_prev = cl) != q->active[prio]);
-
- return sched - now;
-}
-
-static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
-{
- struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
- delay_timer);
- struct Qdisc *sch = q->watchdog.qdisc;
- psched_time_t now;
- psched_tdiff_t delay = 0;
- unsigned int pmask;
-
- now = psched_get_time();
-
- pmask = q->pmask;
- q->pmask = 0;
-
- while (pmask) {
- int prio = ffz(~pmask);
- psched_tdiff_t tmp;
-
- pmask &= ~(1<<prio);
-
- tmp = cbq_undelay_prio(q, prio, now);
- if (tmp > 0) {
- q->pmask |= 1<<prio;
- if (tmp < delay || delay == 0)
- delay = tmp;
- }
- }
-
- if (delay) {
- ktime_t time;
-
- time = 0;
- time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
- hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
- }
-
- __netif_schedule(qdisc_root(sch));
- return HRTIMER_NORESTART;
-}
-
/*
* It is mission critical procedure.
*
@@ -1034,7 +958,6 @@ cbq_reset(struct Qdisc *sch)
q->tx_class = NULL;
q->tx_borrowed = NULL;
qdisc_watchdog_cancel(&q->watchdog);
- hrtimer_cancel(&q->delay_timer);
q->toplevel = TC_CBQ_MAXLEVEL;
q->now = psched_get_time();
@@ -1052,11 +975,10 @@ cbq_reset(struct Qdisc *sch)
cl->cpriority = cl->priority;
}
}
- sch->q.qlen = 0;
}
-static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
+static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
{
if (lss->change & TCF_CBQ_LSS_FLAGS) {
cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
@@ -1074,7 +996,6 @@ static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
}
if (lss->change & TCF_CBQ_LSS_OFFTIME)
cl->offtime = lss->offtime;
- return 0;
}
static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
@@ -1162,8 +1083,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
int err;
qdisc_watchdog_init(&q->watchdog, sch);
- hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
- q->delay_timer.function = cbq_undelay;
err = cbq_opt_parse(tb, opt, extack);
if (err < 0)
@@ -1757,15 +1676,8 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
return;
- }
- arg->count++;
}
}
}
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 459cc240eda9..cac870eb7897 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -520,13 +520,7 @@ static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
- if (walker->count >= walker->skip) {
- if (walker->fn(sch, 1, walker) < 0) {
- walker->stop = 1;
- return;
- }
- }
- walker->count++;
+ tc_qdisc_stats_dump(sch, 1, walker);
}
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2adbd945bf15..3ac3e5c80b6f 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -60,7 +60,6 @@ struct choke_sched_data {
u32 forced_drop; /* Forced drops, qavg > max_thresh */
u32 forced_mark; /* Forced marks, qavg > max_thresh */
u32 pdrop; /* Drops due to queue limits */
- u32 other; /* Drops due to drop() calls */
u32 matched; /* Drops to flow match */
} stats;
@@ -315,8 +314,6 @@ static void choke_reset(struct Qdisc *sch)
rtnl_qdisc_drop(skb, sch);
}
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
if (q->tab)
memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
q->head = q->tail = 0;
@@ -466,7 +463,6 @@ static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.early = q->stats.prob_drop + q->stats.forced_drop,
.marked = q->stats.prob_mark + q->stats.forced_mark,
.pdrop = q->stats.pdrop,
- .other = q->stats.other,
.matched = q->stats.matched,
};
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 30169b3adbbb..d7a4874543de 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -138,9 +138,6 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
unsigned int qlen, dropped = 0;
int err;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt,
codel_policy, NULL);
if (err < 0)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 18e4f7a0b291..e35a4e90f4e6 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -284,15 +284,8 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
return;
- }
- arg->count++;
}
}
}
@@ -441,8 +434,6 @@ static void drr_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void drr_destroy_qdisc(struct Qdisc *sch)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 4c100d105269..401ffaf87d62 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -176,16 +176,12 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
return;
for (i = 0; i < p->indices; i++) {
- if (p->mv[i].mask == 0xff && !p->mv[i].value)
- goto ignore;
- if (walker->count >= walker->skip) {
- if (walker->fn(sch, i + 1, walker) < 0) {
- walker->stop = 1;
- break;
- }
+ if (p->mv[i].mask == 0xff && !p->mv[i].value) {
+ walker->count++;
+ continue;
}
-ignore:
- walker->count++;
+ if (!tc_qdisc_stats_dump(sch, i + 1, walker))
+ break;
}
}
@@ -409,8 +405,6 @@ static void dsmark_reset(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
if (p->q)
qdisc_reset(p->q);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void dsmark_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index c48f91075b5c..61d1f0e32cf3 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -323,9 +323,6 @@ static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
struct tc_etf_qopt_offload etf = { };
int err;
- if (q->offload)
- return 0;
-
if (!ops->ndo_setup_tc) {
NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
return -EOPNOTSUPP;
@@ -445,9 +442,6 @@ static void etf_reset(struct Qdisc *sch)
timesortedlist_clear(sch);
__qdisc_reset_queue(&sch->q);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
-
q->last = 0;
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index d73393493553..b10efeaf0629 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -341,15 +341,8 @@ static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < q->nbands; i++) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, i + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, i + 1, arg))
break;
- }
- arg->count++;
}
}
@@ -594,11 +587,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
unsigned int i;
int err;
- if (!opt) {
- NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
- return -EINVAL;
- }
-
err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
if (err < 0)
return err;
@@ -727,8 +715,6 @@ static void ets_qdisc_reset(struct Qdisc *sch)
}
for (band = 0; band < q->nbands; band++)
qdisc_reset(q->classes[band].qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void ets_qdisc_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 2fb76fc0cc31..48d14fb90ba0 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -808,9 +808,6 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
unsigned drop_len = 0;
u32 fq_log;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy,
NULL);
if (err < 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 839e1235db05..8c4fee063436 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch)
codel_vars_init(&flow->cvars);
}
memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
q->memory_usage = 0;
}
@@ -374,9 +372,6 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
u32 quantum = 0;
int err;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt,
fq_codel_policy, NULL);
if (err < 0)
@@ -687,16 +682,12 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < q->flows_cnt; i++) {
- if (list_empty(&q->flows[i].flowchain) ||
- arg->count < arg->skip) {
+ if (list_empty(&q->flows[i].flowchain)) {
arg->count++;
continue;
}
- if (arg->fn(sch, i + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, i + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index d6aba6edd16e..6980796d435d 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -283,9 +283,6 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
unsigned int num_dropped = 0;
int err;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack);
if (err < 0)
return err;
@@ -521,9 +518,6 @@ static void fq_pie_reset(struct Qdisc *sch)
INIT_LIST_HEAD(&flow->flowchain);
pie_vars_init(&flow->vars);
}
-
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
}
static void fq_pie_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b07bd1c7330f..a9aadc4e6858 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
@@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q)
unsigned long dev_trans_start(struct net_device *dev)
{
- unsigned long val, res;
+ unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
+ unsigned long val;
unsigned int i;
- if (is_vlan_dev(dev))
- dev = vlan_dev_real_dev(dev);
- else if (netif_is_macvlan(dev))
- dev = macvlan_dev_real_dev(dev);
- res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
for (i = 1; i < dev->num_tx_queues; i++) {
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
if (val && time_after(val, res))
@@ -541,7 +537,7 @@ static void dev_watchdog(struct timer_list *t)
spin_unlock(&dev->tx_global_lock);
if (release)
- dev_put_track(dev, &dev->watchdog_dev_tracker);
+ netdev_put(dev, &dev->watchdog_dev_tracker);
}
void __netdev_watchdog_up(struct net_device *dev)
@@ -551,7 +547,8 @@ void __netdev_watchdog_up(struct net_device *dev)
dev->watchdog_timeo = 5*HZ;
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies + dev->watchdog_timeo)))
- dev_hold_track(dev, &dev->watchdog_dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &dev->watchdog_dev_tracker,
+ GFP_ATOMIC);
}
}
EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
@@ -565,7 +562,7 @@ static void dev_watchdog_down(struct net_device *dev)
{
netif_tx_lock_bh(dev);
if (del_timer(&dev->watchdog_timer))
- dev_put_track(dev, &dev->watchdog_dev_tracker);
+ netdev_put(dev, &dev->watchdog_dev_tracker);
netif_tx_unlock_bh(dev);
}
@@ -944,7 +941,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
goto errout;
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
- qdisc_skb_head_init(&sch->q);
gnet_stats_basic_sync_init(&sch->bstats);
spin_lock_init(&sch->q.lock);
@@ -975,7 +971,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
- dev_hold_track(dev, &sch->dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
refcount_set(&sch->refcnt, 1);
return sch;
@@ -1019,22 +1015,14 @@ EXPORT_SYMBOL(qdisc_create_dflt);
void qdisc_reset(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
trace_qdisc_reset(qdisc);
if (ops->reset)
ops->reset(qdisc);
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
-
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ __skb_queue_purge(&qdisc->gso_skb);
+ __skb_queue_purge(&qdisc->skb_bad_txq);
qdisc->q.qlen = 0;
qdisc->qstats.backlog = 0;
@@ -1075,7 +1063,7 @@ static void qdisc_destroy(struct Qdisc *qdisc)
ops->destroy(qdisc);
module_put(ops->owner);
- dev_put_track(qdisc_dev(qdisc), &qdisc->dev_tracker);
+ netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
@@ -1133,6 +1121,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
}
EXPORT_SYMBOL(dev_graft_qdisc);
+static void shutdown_scheduler_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_qdisc_default)
+{
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc_default = _qdisc_default;
+
+ if (qdisc) {
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ dev_queue->qdisc_sleeping = qdisc_default;
+
+ qdisc_put(qdisc);
+ }
+}
+
static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
@@ -1164,30 +1167,34 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) {
- dev->qdisc = qdisc;
+ rcu_assign_pointer(dev->qdisc, qdisc);
qdisc->ops->attach(qdisc);
}
}
+ qdisc = rtnl_dereference(dev->qdisc);
/* Detect default qdisc setup/init failed and fallback to "noqueue" */
- if (dev->qdisc == &noop_qdisc) {
+ if (qdisc == &noop_qdisc) {
netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
default_qdisc_ops->id, noqueue_qdisc_ops.id);
+ netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE;
}
#ifdef CONFIG_NET_SCHED
- if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc, false);
+ if (qdisc != &noop_qdisc)
+ qdisc_hash_add(qdisc, false);
#endif
}
@@ -1217,7 +1224,7 @@ void dev_activate(struct net_device *dev)
* and noqueue_qdisc for virtual interfaces
*/
- if (dev->qdisc == &noop_qdisc)
+ if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev))
@@ -1383,7 +1390,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
void dev_qdisc_change_real_num_tx(struct net_device *dev,
unsigned int new_real_tx)
{
- struct Qdisc *qdisc = dev->qdisc;
+ struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
if (qdisc->ops->change_real_num_tx)
qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
@@ -1447,7 +1454,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev)
{
- dev->qdisc = &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
@@ -1455,28 +1462,13 @@ void dev_init_scheduler(struct net_device *dev)
timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}
-static void shutdown_scheduler_queue(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *_qdisc_default)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
- struct Qdisc *qdisc_default = _qdisc_default;
-
- if (qdisc) {
- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- dev_queue->qdisc_sleeping = qdisc_default;
-
- qdisc_put(qdisc);
- }
-}
-
void dev_shutdown(struct net_device *dev)
{
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_put(dev->qdisc);
- dev->qdisc = &noop_qdisc;
+ qdisc_put(rtnl_dereference(dev->qdisc));
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
WARN_ON(timer_pending(&dev->watchdog_timer));
}
@@ -1529,6 +1521,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
+ r->mpu = conf->mpu;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 1073c76d05c4..a661b062cca8 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -648,9 +648,6 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
u32 max_P;
struct gred_sched_data *prealloc;
- if (opt == NULL)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
extack);
if (err < 0)
@@ -829,7 +826,6 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.Wlog = q->parms.Wlog;
opt.Plog = q->parms.Plog;
opt.Scell_log = q->parms.Scell_log;
- opt.other = q->stats.other;
opt.early = q->stats.prob_drop;
opt.forced = q->stats.forced_drop;
opt.pdrop = q->stats.pdrop;
@@ -895,8 +891,6 @@ append_opt:
goto nla_put_failure;
if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other))
- goto nla_put_failure;
nla_nest_end(skb, vq);
}
@@ -914,10 +908,9 @@ static void gred_destroy(struct Qdisc *sch)
struct gred_sched *table = qdisc_priv(sch);
int i;
- for (i = 0; i < table->DPs; i++) {
- if (table->tab[i])
- gred_destroy_vq(table->tab[i]);
- }
+ for (i = 0; i < table->DPs; i++)
+ gred_destroy_vq(table->tab[i]);
+
gred_offload(sch, TC_GRED_DESTROY);
kfree(table->opt);
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d3979a6000e7..70b0c5873d32 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1349,15 +1349,8 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i],
cl_common.hnode) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
return;
- }
- arg->count++;
}
}
}
@@ -1430,7 +1423,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
struct hfsc_sched *q = qdisc_priv(sch);
struct tc_hfsc_qopt *qopt;
- if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+ if (nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
@@ -1484,8 +1477,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
}
q->eligible = RB_ROOT;
qdisc_watchdog_cancel(&q->watchdog);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 420ede875322..d26cd436cbe3 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -516,9 +516,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
u32 new_quantum = q->quantum;
u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy,
NULL);
if (err < 0)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9267922ea9c3..e5b4bbf3ce3d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1008,8 +1008,6 @@ static void htb_reset(struct Qdisc *sch)
}
qdisc_watchdog_cancel(&q->watchdog);
__qdisc_reset_queue(&q->direct_queue);
- sch->q.qlen = 0;
- sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
}
@@ -1104,9 +1102,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
- goto err_free_direct_qdiscs;
-
- qdisc_skb_head_init(&q->direct_queue);
+ return err;
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
@@ -1127,8 +1123,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 0), extack);
if (!qdisc) {
- err = -ENOMEM;
- goto err_free_qdiscs;
+ return -ENOMEM;
}
htb_set_lockdep_class_child(qdisc);
@@ -1146,7 +1141,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
};
err = htb_offload(dev, &offload_opt);
if (err)
- goto err_free_qdiscs;
+ return err;
/* Defer this assignment, so that htb_destroy skips offload-related
* parts (especially calling ndo_setup_tc) on errors.
@@ -1154,22 +1149,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
q->offload = true;
return 0;
-
-err_free_qdiscs:
- for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
- ntx++)
- qdisc_put(q->direct_qdiscs[ntx]);
-
- qdisc_class_hash_destroy(&q->clhash);
- /* Prevent use-after-free and double-free when htb_destroy gets called.
- */
- q->clhash.hash = NULL;
- q->clhash.hashsize = 0;
-
-err_free_direct_qdiscs:
- kfree(q->direct_qdiscs);
- q->direct_qdiscs = NULL;
- return err;
}
static void htb_attach_offload(struct Qdisc *sch)
@@ -1692,13 +1671,12 @@ static void htb_destroy(struct Qdisc *sch)
qdisc_class_hash_destroy(&q->clhash);
__qdisc_reset_queue(&q->direct_queue);
- if (!q->offload)
- return;
-
- offload_opt = (struct tc_htb_qopt_offload) {
- .command = TC_HTB_DESTROY,
- };
- htb_offload(dev, &offload_opt);
+ if (q->offload) {
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_DESTROY,
+ };
+ htb_offload(dev, &offload_opt);
+ }
if (!q->direct_qdiscs)
return;
@@ -1810,6 +1788,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!hopt->rate.rate || !hopt->ceil.rate)
goto failure;
+ if (q->offload) {
+ /* Options not supported by the offload. */
+ if (hopt->rate.overhead || hopt->ceil.overhead) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
+ goto failure;
+ }
+ if (hopt->rate.mpu || hopt->ceil.mpu) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
+ goto failure;
+ }
+ if (hopt->quantum) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
+ goto failure;
+ }
+ if (hopt->prio) {
+ NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
+ goto failure;
+ }
+ }
+
/* Keeping backward compatible with rate_table based iproute2 tc */
if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
@@ -2121,15 +2119,8 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
return;
- }
- arg->count++;
}
}
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 83d2e54bf303..d0bc660d7401 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -247,11 +247,8 @@ static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
arg->count = arg->skip;
for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
- if (arg->fn(sch, ntx + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b29f3453c6ea..4c68abaa289b 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -558,11 +558,8 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
/* Walk hierarchy with a virtual class per tc */
arg->count = arg->skip;
for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
- if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, ntx + TC_H_MIN_PRIORITY, arg))
return;
- }
- arg->count++;
}
/* Pad the values and skip over unused traffic classes */
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index cd8ab90c4765..75c9c860182b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch)
for (band = 0; band < q->bands; band++)
qdisc_reset(q->queues[band]);
- sch->q.qlen = 0;
q->curband = 0;
}
@@ -354,15 +353,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (band = 0; band < q->bands; band++) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, band + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, band + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ed4ccef5d6a8..fb00ac40ecb7 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -171,7 +171,7 @@ static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
static void init_crandom(struct crndstate *state, unsigned long rho)
{
state->rho = rho;
- state->last = prandom_u32();
+ state->last = get_random_u32();
}
/* get_crandom - correlated random number generator
@@ -184,9 +184,9 @@ static u32 get_crandom(struct crndstate *state)
unsigned long answer;
if (!state || state->rho == 0) /* no correlation */
- return prandom_u32();
+ return get_random_u32();
- value = prandom_u32();
+ value = get_random_u32();
rho = (u64)state->rho + 1;
answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
state->last = answer;
@@ -200,7 +200,7 @@ static u32 get_crandom(struct crndstate *state)
static bool loss_4state(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
- u32 rnd = prandom_u32();
+ u32 rnd = get_random_u32();
/*
* Makes a comparison between rnd and the transition
@@ -268,15 +268,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q)
switch (clg->state) {
case GOOD_STATE:
- if (prandom_u32() < clg->a1)
+ if (get_random_u32() < clg->a1)
clg->state = BAD_STATE;
- if (prandom_u32() < clg->a4)
+ if (get_random_u32() < clg->a4)
return true;
break;
case BAD_STATE:
- if (prandom_u32() < clg->a2)
+ if (get_random_u32() < clg->a2)
clg->state = GOOD_STATE;
- if (prandom_u32() > clg->a3)
+ if (get_random_u32() > clg->a3)
return true;
}
@@ -513,8 +513,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
goto finish_segs;
}
- skb->data[prandom_u32() % skb_headlen(skb)] ^=
- 1<<(prandom_u32() % 8);
+ skb->data[prandom_u32_max(skb_headlen(skb))] ^=
+ 1<<prandom_u32_max(8);
}
if (unlikely(sch->q.qlen >= sch->limit)) {
@@ -632,7 +632,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
if (!q->slot_dist)
next_delay = q->slot_config.min_delay +
- (prandom_u32() *
+ (get_random_u32() *
(q->slot_config.max_delay -
q->slot_config.min_delay) >> 32);
else
@@ -961,9 +961,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
int old_loss_model = CLG_RANDOM;
int ret;
- if (opt == NULL)
- return -EINVAL;
-
qopt = nla_data(opt);
ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
if (ret < 0)
@@ -1146,9 +1143,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_rate rate;
struct tc_netem_slot slot;
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
UINT_MAX);
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
@@ -1254,12 +1251,8 @@ static unsigned long netem_find(struct Qdisc *sch, u32 classid)
static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
- if (walker->count >= walker->skip)
- if (walker->fn(sch, 1, walker) < 0) {
- walker->stop = 1;
- return;
- }
- walker->count++;
+ if (!tc_qdisc_stats_dump(sch, 1, walker))
+ return;
}
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 5a457ff61acd..265c238047a4 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -72,7 +72,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
if (vars->accu_prob >= (MAX_PROB / 2) * 17)
return true;
- prandom_bytes(&rnd, 8);
+ get_random_bytes(&rnd, 8);
if ((rnd >> BITS_PER_BYTE) < local_prob) {
vars->accu_prob = 0;
return true;
@@ -143,9 +143,6 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
unsigned int qlen, dropped = 0;
int err;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
NULL);
if (err < 0)
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index cbc2ebca4548..ea8c4a7174bb 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -161,9 +161,6 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt,
struct plug_sched_data *q = qdisc_priv(sch);
struct tc_plug_qopt *msg;
- if (opt == NULL)
- return -EINVAL;
-
msg = nla_data(opt);
if (nla_len(opt) < sizeof(*msg))
return -EINVAL;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3b8d7197c06b..fdc5ef52c3ee 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch)
for (prio = 0; prio < q->bands; prio++)
qdisc_reset(q->queues[prio]);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
@@ -187,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
qopt = nla_data(opt);
- if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+ if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < TCQ_MIN_PRIO_BANDS)
return -EINVAL;
for (i = 0; i <= TC_PRIO_MAX; i++) {
@@ -378,15 +376,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (prio = 0; prio < q->bands; prio++) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, prio + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, prio + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d4ce58c90f9f..cf5ebe43b3b4 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -659,15 +659,8 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
return;
- }
- arg->count++;
}
}
}
@@ -1458,8 +1451,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void qfq_destroy_qdisc(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 40adf1f07a82..98129324e157 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
+ unsigned int len;
int ret;
q->vars.qavg = red_calc_qavg(&q->parms,
@@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
break;
}
+ len = qdisc_pkt_len(skb);
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
- qdisc_qstats_backlog_inc(sch, skb);
+ sch->qstats.backlog += len;
sch->q.qlen++;
} else if (net_xmit_drop_count(ret)) {
q->stats.pdrop++;
@@ -176,8 +178,6 @@ static void red_reset(struct Qdisc *sch)
struct red_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
red_restart(&q->vars);
}
@@ -370,9 +370,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct nlattr *tb[TCA_RED_MAX + 1];
int err;
- if (!opt)
- return -EINVAL;
-
err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
extack);
if (err < 0)
@@ -463,7 +460,6 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
}
st.early = q->stats.prob_drop + q->stats.forced_drop;
st.pdrop = q->stats.pdrop;
- st.other = q->stats.other;
st.marked = q->stats.prob_mark + q->stats.forced_mark;
return gnet_stats_copy_app(d, &st, sizeof(st));
@@ -522,12 +518,7 @@ static unsigned long red_find(struct Qdisc *sch, u32 classid)
static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
- if (walker->count >= walker->skip)
- if (walker->fn(sch, 1, walker) < 0) {
- walker->stop = 1;
- return;
- }
- walker->count++;
+ tc_qdisc_stats_dump(sch, 1, walker);
}
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 3d061a13d7ed..1871a1c0224d 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
}
}
-static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q)
{
u32 sfbhash;
- sfbhash = sfb_hash(skb, 0);
+ sfbhash = cb->hashes[0];
if (sfbhash)
increment_one_qlen(sfbhash, 0, q);
- sfbhash = sfb_hash(skb, 1);
+ sfbhash = cb->hashes[1];
if (sfbhash)
increment_one_qlen(sfbhash, 1, q);
}
@@ -281,8 +281,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct sfb_sched_data *q = qdisc_priv(sch);
+ unsigned int len = qdisc_pkt_len(skb);
struct Qdisc *child = q->qdisc;
struct tcf_proto *fl;
+ struct sfb_skb_cb cb;
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
@@ -377,7 +379,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
goto enqueue;
}
- r = prandom_u32() & SFB_MAX_PROB;
+ r = get_random_u16() & SFB_MAX_PROB;
if (unlikely(r < p_min)) {
if (unlikely(p_min > SFB_MAX_PROB / 2)) {
@@ -399,11 +401,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
enqueue:
+ memcpy(&cb, sfb_skb_cb(skb), sizeof(cb));
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
- qdisc_qstats_backlog_inc(sch, skb);
+ sch->qstats.backlog += len;
sch->q.qlen++;
- increment_qlen(skb, q);
+ increment_qlen(&cb, q);
} else if (net_xmit_drop_count(ret)) {
q->stats.childdrop++;
qdisc_qstats_drop(sch);
@@ -452,9 +455,8 @@ static void sfb_reset(struct Qdisc *sch)
{
struct sfb_sched_data *q = qdisc_priv(sch);
- qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
+ if (likely(q->qdisc))
+ qdisc_reset(q->qdisc);
q->slot = 0;
q->double_buffering = false;
sfb_zero_all_buckets(q);
@@ -658,12 +660,7 @@ static int sfb_delete(struct Qdisc *sch, unsigned long cl,
static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
- if (walker->count >= walker->skip)
- if (walker->fn(sch, 1, walker) < 0) {
- walker->stop = 1;
- return;
- }
- walker->count++;
+ tc_qdisc_stats_dump(sch, 1, walker);
}
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f8e569f79f13..abd436307d6a 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -888,16 +888,12 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < q->divisor; i++) {
- if (q->ht[i] == SFQ_EMPTY_SLOT ||
- arg->count < arg->skip) {
+ if (q->ht[i] == SFQ_EMPTY_SLOT) {
arg->count++;
continue;
}
- if (arg->fn(sch, i + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, i + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 7a5e4c454715..5df2dacb7b1a 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch)
struct skbprio_sched_data *q = qdisc_priv(sch);
int prio;
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
-
for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
__skb_queue_purge(&q->qdiscs[prio]);
@@ -268,15 +265,8 @@ static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
return;
for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, i + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, i + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 377f896bdedc..570389f6cdd7 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/time.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -26,7 +27,6 @@
#include <net/tcp.h>
static LIST_HEAD(taprio_list);
-static DEFINE_SPINLOCK(taprio_list_lock);
#define TAPRIO_ALL_GATES_OPEN -1
@@ -66,6 +66,7 @@ struct taprio_sched {
u32 flags;
enum tk_offsets tk_offset;
int clockid;
+ bool offloaded;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
*/
@@ -77,8 +78,8 @@ struct taprio_sched {
struct sched_gate_list __rcu *admin_sched;
struct hrtimer advance_timer;
struct list_head taprio_list;
- struct sk_buff *(*dequeue)(struct Qdisc *sch);
- struct sk_buff *(*peek)(struct Qdisc *sch);
+ u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
+ u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */
u32 txtime_delay;
};
@@ -176,7 +177,7 @@ static ktime_t get_interval_end_time(struct sched_gate_list *sched,
static int length_to_duration(struct taprio_sched *q, int len)
{
- return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
+ return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
}
/* Returns the entry corresponding to next available interval. If
@@ -416,8 +417,12 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
struct Qdisc *child, struct sk_buff **to_free)
{
struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int prio = skb->priority;
+ u8 tc;
- if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
+ /* sk_flags are only safe to use on full sockets. */
+ if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
if (!is_valid_interval(skb, sch))
return qdisc_drop(skb, sch, to_free);
} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
@@ -426,12 +431,20 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
+ /* Devices with full offload are expected to honor this in hardware */
+ tc = netdev_get_prio_tc_map(dev, prio);
+ if (skb->len > q->max_frm_len[tc])
+ return qdisc_drop(skb, sch, to_free);
+
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return qdisc_enqueue(skb, child, to_free);
}
+/* Will not be called in the full offload case, since the TX queues are
+ * attached to the Qdisc created using qdisc_create_dflt()
+ */
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -439,11 +452,6 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct Qdisc *child;
int queue;
- if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) {
- WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n");
- return qdisc_drop(skb, sch, to_free);
- }
-
queue = skb_get_queue_mapping(skb);
child = q->qdiscs[queue];
@@ -452,10 +460,10 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Large packets might not be transmitted when the transmission duration
* exceeds any configured interval. Therefore, segment the skb into
- * smaller chunks. Skip it for the full offload case, as the driver
- * and/or the hardware is expected to handle this.
+ * smaller chunks. Drivers with full offload are expected to handle
+ * this in hardware.
*/
- if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ if (skb_is_gso(skb)) {
unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
netdev_features_t features = netif_skb_features(skb);
struct sk_buff *segs, *nskb;
@@ -489,7 +497,10 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return taprio_enqueue_one(skb, sch, child, to_free);
}
-static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
+/* Will not be called in the full offload case, since the TX queues are
+ * attached to the Qdisc created using qdisc_create_dflt()
+ */
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -533,28 +544,17 @@ static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
return NULL;
}
-static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
-{
- WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n");
-
- return NULL;
-}
-
-static struct sk_buff *taprio_peek(struct Qdisc *sch)
-{
- struct taprio_sched *q = qdisc_priv(sch);
-
- return q->peek(sch);
-}
-
static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
{
atomic_set(&entry->budget,
- div64_u64((u64)entry->interval * 1000,
+ div64_u64((u64)entry->interval * PSEC_PER_NSEC,
atomic64_read(&q->picos_per_byte)));
}
-static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
+/* Will not be called in the full offload case, since the TX queues are
+ * attached to the Qdisc created using qdisc_create_dflt()
+ */
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
@@ -642,20 +642,6 @@ done:
return skb;
}
-static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
-{
- WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n");
-
- return NULL;
-}
-
-static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
-{
- struct taprio_sched *q = qdisc_priv(sch);
-
- return q->dequeue(sch);
-}
-
static bool should_restart_cycle(const struct sched_gate_list *oper,
const struct sched_entry *entry)
{
@@ -778,6 +764,11 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
[TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
};
+static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
+ [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 },
+ [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
+};
+
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
@@ -790,6 +781,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
+ [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
};
static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
@@ -1096,27 +1088,20 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net_device *qdev;
struct taprio_sched *q;
- bool found = false;
ASSERT_RTNL();
if (event != NETDEV_UP && event != NETDEV_CHANGE)
return NOTIFY_DONE;
- spin_lock(&taprio_list_lock);
list_for_each_entry(q, &taprio_list, taprio_list) {
- qdev = qdisc_dev(q->root);
- if (qdev == dev) {
- found = true;
- break;
- }
- }
- spin_unlock(&taprio_list_lock);
+ if (dev != qdisc_dev(q->root))
+ continue;
- if (found)
taprio_set_picos_per_byte(dev, q);
+ break;
+ }
return NOTIFY_DONE;
}
@@ -1191,16 +1176,10 @@ static void taprio_offload_config_changed(struct taprio_sched *q)
{
struct sched_gate_list *oper, *admin;
- spin_lock(&q->current_entry_lock);
-
- oper = rcu_dereference_protected(q->oper_sched,
- lockdep_is_held(&q->current_entry_lock));
- admin = rcu_dereference_protected(q->admin_sched,
- lockdep_is_held(&q->current_entry_lock));
+ oper = rtnl_dereference(q->oper_sched);
+ admin = rtnl_dereference(q->admin_sched);
switch_schedules(q, &admin, &oper);
-
- spin_unlock(&q->current_entry_lock);
}
static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
@@ -1253,7 +1232,8 @@ static int taprio_enable_offload(struct net_device *dev,
{
const struct net_device_ops *ops = dev->netdev_ops;
struct tc_taprio_qopt_offload *offload;
- int err = 0;
+ struct tc_taprio_caps caps;
+ int tc, err = 0;
if (!ops->ndo_setup_tc) {
NL_SET_ERR_MSG(extack,
@@ -1261,6 +1241,19 @@ static int taprio_enable_offload(struct net_device *dev,
return -EOPNOTSUPP;
}
+ qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
+ &caps, sizeof(caps));
+
+ if (!caps.supports_queue_max_sdu) {
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ if (q->max_sdu[tc]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device does not handle queueMaxSDU");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+
offload = taprio_offload_alloc(sched->num_entries);
if (!offload) {
NL_SET_ERR_MSG(extack,
@@ -1270,6 +1263,9 @@ static int taprio_enable_offload(struct net_device *dev,
offload->enable = 1;
taprio_sched_to_offload(dev, sched, offload);
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++)
+ offload->max_sdu[tc] = q->max_sdu[tc];
+
err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
if (err < 0) {
NL_SET_ERR_MSG(extack,
@@ -1277,6 +1273,8 @@ static int taprio_enable_offload(struct net_device *dev,
goto done;
}
+ q->offloaded = true;
+
done:
taprio_offload_free(offload);
@@ -1291,12 +1289,9 @@ static int taprio_disable_offload(struct net_device *dev,
struct tc_taprio_qopt_offload *offload;
int err;
- if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
+ if (!q->offloaded)
return 0;
- if (!ops->ndo_setup_tc)
- return -EOPNOTSUPP;
-
offload = taprio_offload_alloc(0);
if (!offload) {
NL_SET_ERR_MSG(extack,
@@ -1312,6 +1307,8 @@ static int taprio_disable_offload(struct net_device *dev,
goto out;
}
+ q->offloaded = false;
+
out:
taprio_offload_free(offload);
@@ -1403,6 +1400,89 @@ out:
return err;
}
+static int taprio_parse_tc_entry(struct Qdisc *sch,
+ struct nlattr *opt,
+ u32 max_sdu[TC_QOPT_MAX_QUEUE],
+ unsigned long *seen_tcs,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
+ struct net_device *dev = qdisc_dev(sch);
+ u32 val = 0;
+ int err, tc;
+
+ err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
+ taprio_tc_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
+ NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
+ return -EINVAL;
+ }
+
+ tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
+ if (tc >= TC_QOPT_MAX_QUEUE) {
+ NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
+ return -ERANGE;
+ }
+
+ if (*seen_tcs & BIT(tc)) {
+ NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
+ return -EINVAL;
+ }
+
+ *seen_tcs |= BIT(tc);
+
+ if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
+ val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
+
+ if (val > dev->max_mtu) {
+ NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
+ return -ERANGE;
+ }
+
+ max_sdu[tc] = val;
+
+ return 0;
+}
+
+static int taprio_parse_tc_entries(struct Qdisc *sch,
+ struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ u32 max_sdu[TC_QOPT_MAX_QUEUE];
+ unsigned long seen_tcs = 0;
+ struct nlattr *n;
+ int tc, rem;
+ int err = 0;
+
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ max_sdu[tc] = q->max_sdu[tc];
+
+ nla_for_each_nested(n, opt, rem) {
+ if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
+ continue;
+
+ err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack);
+ if (err)
+ goto out;
+ }
+
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
+ q->max_sdu[tc] = max_sdu[tc];
+ if (max_sdu[tc])
+ q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len;
+ else
+ q->max_frm_len[tc] = U32_MAX; /* never oversized */
+ }
+
+out:
+ return err;
+}
+
static int taprio_mqprio_cmp(const struct net_device *dev,
const struct tc_mqprio_qopt *mqprio)
{
@@ -1481,6 +1561,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
+ err = taprio_parse_tc_entries(sch, opt, extack);
+ if (err)
+ return err;
+
new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
if (!new_admin) {
NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
@@ -1488,10 +1572,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
}
INIT_LIST_HEAD(&new_admin->entries);
- rcu_read_lock();
- oper = rcu_dereference(q->oper_sched);
- admin = rcu_dereference(q->admin_sched);
- rcu_read_unlock();
+ oper = rtnl_dereference(q->oper_sched);
+ admin = rtnl_dereference(q->admin_sched);
/* no changes - no new mqprio settings */
if (!taprio_mqprio_cmp(dev, mqprio))
@@ -1561,17 +1643,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
q->advance_timer.function = advance_sched;
}
- if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- q->dequeue = taprio_dequeue_offload;
- q->peek = taprio_peek_offload;
- } else {
- /* Be sure to always keep the function pointers
- * in a consistent state.
- */
- q->dequeue = taprio_dequeue_soft;
- q->peek = taprio_peek_soft;
- }
-
err = taprio_get_start_time(sch, new_admin, &start);
if (err < 0) {
NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
@@ -1634,19 +1705,16 @@ static void taprio_reset(struct Qdisc *sch)
if (q->qdiscs[i])
qdisc_reset(q->qdiscs[i]);
}
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
}
static void taprio_destroy(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct sched_gate_list *oper, *admin;
unsigned int i;
- spin_lock(&taprio_list_lock);
list_del(&q->taprio_list);
- spin_unlock(&taprio_list_lock);
/* Note that taprio_reset() might not be called if an error
* happens in qdisc_create(), after taprio_init() has been called.
@@ -1665,11 +1733,14 @@ static void taprio_destroy(struct Qdisc *sch)
netdev_reset_tc(dev);
- if (q->oper_sched)
- call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
+ oper = rtnl_dereference(q->oper_sched);
+ admin = rtnl_dereference(q->admin_sched);
- if (q->admin_sched)
- call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb);
+ if (oper)
+ call_rcu(&oper->rcu, taprio_free_sched_cb);
+
+ if (admin)
+ call_rcu(&admin->rcu, taprio_free_sched_cb);
}
static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
@@ -1684,9 +1755,6 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
q->advance_timer.function = advance_sched;
- q->dequeue = taprio_dequeue_soft;
- q->peek = taprio_peek_soft;
-
q->root = sch;
/* We only support static clockids. Use an invalid value as default
@@ -1695,15 +1763,17 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
q->clockid = -1;
q->flags = TAPRIO_FLAGS_INVALID;
- spin_lock(&taprio_list_lock);
list_add(&q->taprio_list, &taprio_list);
- spin_unlock(&taprio_list_lock);
- if (sch->parent != TC_H_ROOT)
+ if (sch->parent != TC_H_ROOT) {
+ NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
return -EOPNOTSUPP;
+ }
- if (!netif_is_multiqueue(dev))
+ if (!netif_is_multiqueue(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
return -EOPNOTSUPP;
+ }
/* pre-allocate qdisc, attachment can't fail */
q->qdiscs = kcalloc(dev->num_tx_queues,
@@ -1875,6 +1945,33 @@ error_nest:
return -1;
}
+static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
+{
+ struct nlattr *n;
+ int tc;
+
+ for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
+ n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
+ if (!n)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
+ q->max_sdu[tc]))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, n);
+ }
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, n);
+ return -EMSGSIZE;
+}
+
static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct taprio_sched *q = qdisc_priv(sch);
@@ -1884,9 +1981,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nest, *sched_nest;
unsigned int i;
- rcu_read_lock();
- oper = rcu_dereference(q->oper_sched);
- admin = rcu_dereference(q->admin_sched);
+ oper = rtnl_dereference(q->oper_sched);
+ admin = rtnl_dereference(q->admin_sched);
opt.num_tc = netdev_get_num_tc(dev);
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
@@ -1914,6 +2010,9 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
+ if (taprio_dump_tc_entries(q, skb))
+ goto options_error;
+
if (oper && dump_schedule(skb, oper))
goto options_error;
@@ -1930,8 +2029,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_nest_end(skb, sched_nest);
done:
- rcu_read_unlock();
-
return nla_nest_end(skb, nest);
admin_error:
@@ -1941,7 +2038,6 @@ options_error:
nla_nest_cancel(skb, nest);
start_error:
- rcu_read_unlock();
return -ENOSPC;
}
@@ -2000,11 +2096,8 @@ static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
arg->count = arg->skip;
for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
- if (arg->fn(sch, ntx + 1, arg) < 0) {
- arg->stop = 1;
+ if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
break;
- }
- arg->count++;
}
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 72102277449e..277ad11f4d61 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -330,8 +330,6 @@ static void tbf_reset(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- sch->qstats.backlog = 0;
- sch->q.qlen = 0;
q->t_c = ktime_get_ns();
q->tokens = q->buffer;
q->ptokens = q->mtu;
@@ -356,6 +354,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
struct nlattr *tb[TCA_TBF_MAX + 1];
struct tc_tbf_qopt *qopt;
struct Qdisc *child = NULL;
+ struct Qdisc *old = NULL;
struct psched_ratecfg rate;
struct psched_ratecfg peak;
u64 max_size;
@@ -447,7 +446,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
qdisc_tree_flush_backlog(q->qdisc);
- qdisc_put(q->qdisc);
+ old = q->qdisc;
q->qdisc = child;
}
q->limit = qopt->limit;
@@ -467,6 +466,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
sch_tree_unlock(sch);
+ qdisc_put(old);
err = 0;
tbf_offload_change(sch);
@@ -580,12 +580,7 @@ static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
if (!walker->stop) {
- if (walker->count >= walker->skip)
- if (walker->fn(sch, 1, walker) < 0) {
- walker->stop = 1;
- return;
- }
- walker->count++;
+ tc_qdisc_stats_dump(sch, 1, walker);
}
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6af6b95bdb67..16f9238aa51d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
skb_queue_purge(&dat->q);
- sch->q.qlen = 0;
}
static void
@@ -492,7 +491,7 @@ static int __init teql_init(void)
master = netdev_priv(dev);
- strlcpy(master->qops.id, dev->name, IFNAMSIZ);
+ strscpy(master->qops.id, dev->name, IFNAMSIZ);
err = register_qdisc(&master->qops);
if (err) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index be29da09cc7a..3460abceba44 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
- 0, gfp))
- goto fail_init;
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
+ goto stream_free;
/* Initialize default path MTU. */
asoc->pathmtu = sp->pathmtu;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index db6b7373d16c..34964145514e 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -863,12 +863,17 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
}
list_del_init(&shkey->key_list);
- sctp_auth_shkey_release(shkey);
list_add(&cur_key->key_list, sh_keys);
- if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+ if (asoc && asoc->active_key_id == auth_key->sca_keynumber &&
+ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
+ list_del_init(&cur_key->key_list);
+ sctp_auth_shkey_release(cur_key);
+ list_add(&shkey->key_list, sh_keys);
+ return -ENOMEM;
+ }
+ sctp_auth_shkey_release(shkey);
return 0;
}
@@ -902,8 +907,13 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
return -EINVAL;
if (asoc) {
+ __u16 active_key_id = asoc->active_key_id;
+
asoc->active_key_id = key_id;
- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+ if (sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
+ asoc->active_key_id = active_key_id;
+ return -ENOMEM;
+ }
} else
ep->active_key_id = key_id;
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 034e2c74497d..d9c6d8f30f09 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
r->idiag_retrans = asoc->rtx_data_chunks;
r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
- } else {
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
- r->idiag_expires = 0;
}
}
@@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
r = nlmsg_data(nlh);
BUG_ON(!sk_fullsock(sk));
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->idiag_expires = 0;
if (asoc) {
inet_diag_msg_sctpasoc_fill(r, sk, asoc);
} else {
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
}
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 90e12bafdd48..4f43afa8678f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb)
struct sctp_chunk *chunk;
union sctp_addr src;
union sctp_addr dest;
+ int bound_dev_if;
int family;
struct sctp_af *af;
struct net *net = dev_net(skb->dev);
@@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb)
* If a frame arrives on an interface and the receiving socket is
* bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
*/
- if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) {
if (transport) {
sctp_transport_put(transport);
asoc = NULL;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 470dbdc27d58..d081858c2d07 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -926,7 +926,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
return 1;
/* v4-mapped-v6 addresses */
case AF_INET:
- if (!__ipv6_only_sock(sctp_opt2sk(sp)))
+ if (!ipv6_only_sock(sctp_opt2sk(sp)))
return 1;
fallthrough;
default:
@@ -952,7 +952,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
return 0;
/* If the socket is IPv6 only, v4 addrs will not match */
- if (__ipv6_only_sock(sk) && af1 != af2)
+ if (ipv6_only_sock(sk) && af1 != af2)
return 0;
/* Today, wildcard AF_INET/AF_INET6. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 72fe6669c50d..a63df055ac57 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -134,7 +134,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
- packet->max_size = sk_can_gso(sk) ? READ_ONCE(tp->dst->dev->gso_max_size)
+ packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size),
+ GSO_LEGACY_MAX_SIZE)
: asoc->pathmtu;
rcu_read_unlock();
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a18609f608fb..20831079fb09 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -384,6 +384,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
{
struct sctp_outq *q = &asoc->outqueue;
struct sctp_chunk *chk, *temp;
+ struct sctp_stream_out *sout;
q->sched->unsched_all(&asoc->stream);
@@ -398,12 +399,14 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
- if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
- struct sctp_stream_out *streamout =
- SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
- streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
- }
+ sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
+ sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+
+ /* clear out_curr if all frag chunks are pruned */
+ if (asoc->stream.out_curr == sout &&
+ list_is_last(&chk->frag_list, &chk->msg->chunks))
+ asoc->stream.out_curr = NULL;
msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
sctp_chunk_free(chk);
@@ -914,6 +917,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
ctx->asoc->base.sk->sk_err = -error;
return;
}
+ ctx->asoc->stats.octrlchunks++;
break;
case SCTP_CID_ABORT:
@@ -938,7 +942,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
case SCTP_CID_HEARTBEAT:
if (chunk->pmtu_probe) {
- sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+ error = sctp_packet_singleton(ctx->transport,
+ chunk, ctx->gfp);
+ if (!error)
+ ctx->asoc->stats.octrlchunks++;
break;
}
fallthrough;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 35928fefae33..bcd3384ab07a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
@@ -1523,11 +1523,11 @@ static __init int sctp_init(void)
limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
- sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
+ sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
- sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
+ sysctl_sctp_wmem[0] = PAGE_SIZE;
sysctl_sctp_wmem[1] = 16*1024;
sysctl_sctp_wmem[2] = max(64*1024, max_share);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b3815b568e8e..463c4a58d2c3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
goto out_unlock;
}
+ /* This happens when the response arrives after the timer is triggered. */
+ if (!asoc->strreset_chunk)
+ goto out_unlock;
+
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
asoc->state, asoc->ep, asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index cc544a97c4af..f6ee7f4040c1 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -781,7 +781,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
}
}
- if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+ if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) {
sctp_association_free(new_asoc);
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
@@ -930,6 +930,11 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Set peer label for connection. */
+ if (security_sctp_assoc_established((struct sctp_association *)asoc,
+ chunk->head_skb ?: chunk->skb))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Verify that the chunk length for the COOKIE-ACK is OK.
* If we don't do this, any bundled chunks may be junked.
*/
@@ -945,9 +950,6 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
*/
sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
- /* Set peer label for connection. */
- security_inet_conn_established(ep->base.sk, chunk->skb);
-
/* RFC 2960 5.1 Normal Establishment of an Association
*
* E) Upon reception of the COOKIE ACK, endpoint "A" will move
@@ -2260,7 +2262,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
}
/* Update socket peer label if first association. */
- if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+ if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) {
sctp_association_free(new_asoc);
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
@@ -6588,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
pr_debug("%s: under pressure, reneging for tsn:%u\n",
__func__, tsn);
deliver = SCTP_CMD_RENEGE;
- } else {
- sk_mem_reclaim(sk);
}
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3e1a9600be5e..83628c347744 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -93,6 +93,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
static unsigned long sctp_memory_pressure;
static atomic_long_t sctp_memory_allocated;
+static DEFINE_PER_CPU(int, sctp_memory_per_cpu_fw_alloc);
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
@@ -1823,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (sctp_wspace(asoc) < (int)msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
- if (sk_under_memory_pressure(sk))
- sk_mem_reclaim(sk);
-
if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -2084,7 +2082,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
* 5 for complete description of the flags.
*/
static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sctp_ulpevent *event = NULL;
struct sctp_sock *sp = sctp_sk(sk);
@@ -2093,9 +2091,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int err = 0;
int skb_len;
- pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, "
- "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags,
- addr_len);
+ pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n",
+ __func__, sk, msg, len, flags, addr_len);
lock_sock(sk);
@@ -2105,7 +2102,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = sctp_skb_recv_datagram(sk, flags, noblock, &err);
+ skb = sctp_skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -2129,7 +2126,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
head_skb = event->chunk->head_skb;
else
head_skb = skb;
- sock_recv_ts_and_drops(msg, sk, head_skb);
+ sock_recv_cmsgs(msg, sk, head_skb);
if (sctp_ulpevent_is_notification(event)) {
msg->msg_flags |= MSG_NOTIFICATION;
sp->pf->event_msgname(event, msg->msg_name, addr_len);
@@ -5636,7 +5633,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
* Set the daddr and initialize id to something more random and also
* copy over any ip options.
*/
- sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+ sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk);
sp->pf->copy_ip_options(sk, sock->sk);
/* Populate the fields of the newsk from the oldsk and migrate the
@@ -8322,7 +8319,7 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = prandom_u32_max(remaining) + low;
do {
rover++;
@@ -8978,14 +8975,13 @@ out:
* Note: This is pretty much the same routine as in core/datagram.c
* with a few changes to make lksctp work.
*/
-struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
- int noblock, int *err)
+struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err)
{
int error;
struct sk_buff *skb;
long timeo;
- timeo = sock_rcvtimeo(sk, noblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo,
MAX_SCHEDULE_TIMEOUT);
@@ -9018,7 +9014,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
break;
if (sk_can_busy_loop(sk)) {
- sk_busy_loop(sk, noblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
continue;
@@ -9196,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
goto do_error;
if (signal_pending(current))
goto do_interrupted;
- if (sk_under_memory_pressure(sk))
- sk_mem_reclaim(sk);
if ((int)msg_len <= sctp_wspace(asoc) &&
sk_wmem_schedule(sk, msg_len))
break;
@@ -9454,7 +9448,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
newinet->inet_dport = htons(asoc->peer.port);
newinet->pmtudisc = inet->pmtudisc;
- newinet->inet_id = prandom_u32();
+ newinet->inet_id = get_random_u16();
newinet->uc_ttl = inet->uc_ttl;
newinet->mc_loop = 1;
@@ -9659,7 +9653,10 @@ struct proto sctp_prot = {
.sysctl_wmem = sysctl_sctp_wmem,
.memory_pressure = &sctp_memory_pressure,
.enter_memory_pressure = sctp_enter_memory_pressure,
+
.memory_allocated = &sctp_memory_allocated,
+ .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
.sockets_allocated = &sctp_sockets_allocated,
};
@@ -9702,7 +9699,10 @@ struct proto sctpv6_prot = {
.sysctl_wmem = sysctl_sctp_wmem,
.memory_pressure = &sctp_memory_pressure,
.enter_memory_pressure = sctp_enter_memory_pressure,
+
.memory_allocated = &sctp_memory_allocated,
+ .per_cpu_fw_alloc = &sctp_memory_per_cpu_fw_alloc,
+
.sockets_allocated = &sctp_sockets_allocated,
};
#endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 6dc95dcc0ff4..ef9fceadef8d 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
ret = sctp_stream_alloc_out(stream, outcnt, gfp);
if (ret)
- goto out_err;
+ return ret;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
handle_in:
sctp_stream_interleave_init(stream);
if (!incnt)
- goto out;
-
- ret = sctp_stream_alloc_in(stream, incnt, gfp);
- if (ret)
- goto in_err;
-
- goto out;
+ return 0;
-in_err:
- sched->free(stream);
- genradix_free(&stream->in);
-out_err:
- genradix_free(&stream->out);
- stream->outcnt = 0;
-out:
- return ret;
+ return sctp_stream_alloc_in(stream, incnt, gfp);
}
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 6b13f737ebf2..bb22b71df7a3 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
sctp_intl_start_pd(ulpq, gfp);
-
- sk_mem_reclaim(asoc->base.sk);
}
static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 99e5f69fbb74..1ad565ed5627 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -146,14 +146,11 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
/* Give the next scheduler a clean slate. */
for (i = 0; i < asoc->stream.outcnt; i++) {
- void *p = SCTP_SO(&asoc->stream, i)->ext;
+ struct sctp_stream_out_ext *ext = SCTP_SO(&asoc->stream, i)->ext;
- if (!p)
+ if (!ext)
continue;
-
- p += offsetofend(struct sctp_stream_out_ext, outq);
- memset(p, 0, sizeof(struct sctp_stream_out_ext) -
- offsetofend(struct sctp_stream_out_ext, outq));
+ memset_after(ext, 0, outq);
}
}
@@ -163,7 +160,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
if (!SCTP_SO(&asoc->stream, i)->ext)
continue;
- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
if (ret)
goto err;
}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 0c3d2b4d7321..8920ca92a011 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -1063,7 +1063,7 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
struct sk_buff *skb;
int err;
- skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err);
+ skb = sctp_skb_recv_datagram(sk, MSG_PEEK | MSG_DONTWAIT, &err);
if (skb != NULL) {
__sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb),
msghdr, skb);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 407fed46931b..0a8510a0c5e6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1100,12 +1100,8 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
else if (retval == 1)
sctp_ulpq_reasm_drain(ulpq);
}
-
- sk_mem_reclaim(asoc->base.sk);
}
-
-
/* Notify the application if an association is aborted and in
* partial delivery mode. Send up any pending received messages.
*/
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 196fb6f01b14..875efcd126a2 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
smc-y += smc_tracepoint.o
+smc-$(CONFIG_SYSCTL) += smc_sysctl.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index aa3bcaaeabf7..e12d4fa5aece 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -51,6 +51,7 @@
#include "smc_close.h"
#include "smc_stats.h"
#include "smc_tracepoint.h"
+#include "smc_sysctl.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -59,12 +60,52 @@ static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
* creation on client
*/
+static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */
struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
struct workqueue_struct *smc_close_wq; /* wq for close work */
static void smc_tcp_listen_work(struct work_struct *);
static void smc_connect_work(struct work_struct *);
+int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+ void *hdr;
+
+ if (cb_ctx->pos[0])
+ goto out;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &smc_gen_nl_family, NLM_F_MULTI,
+ SMC_NETLINK_DUMP_HS_LIMITATION);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (nla_put_u8(skb, SMC_NLA_HS_LIMITATION_ENABLED,
+ sock_net(skb->sk)->smc.limit_smc_hs))
+ goto err;
+
+ genlmsg_end(skb, hdr);
+ cb_ctx->pos[0] = 1;
+out:
+ return skb->len;
+err:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
+{
+ sock_net(skb->sk)->smc.limit_smc_hs = true;
+ return 0;
+}
+
+int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
+{
+ sock_net(skb->sk)->smc.limit_smc_hs = false;
+ return 0;
+}
+
static void smc_set_keepalive(struct sock *sk, int val)
{
struct smc_sock *smc = smc_sk(sk);
@@ -72,6 +113,61 @@ static void smc_set_keepalive(struct sock *sk, int val)
smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
}
+static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
+ struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req)
+{
+ struct smc_sock *smc;
+ struct sock *child;
+
+ smc = smc_clcsock_user_data(sk);
+
+ if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
+ sk->sk_max_ack_backlog)
+ goto drop;
+
+ if (sk_acceptq_is_full(&smc->sk)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ goto drop;
+ }
+
+ /* passthrough to original syn recv sock fct */
+ child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
+ own_req);
+ /* child must not inherit smc or its ops */
+ if (child) {
+ rcu_assign_sk_user_data(child, NULL);
+
+ /* v4-mapped sockets don't inherit parent ops. Don't restore. */
+ if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
+ inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
+ }
+ return child;
+
+drop:
+ dst_release(dst);
+ tcp_listendrop(sk);
+ return NULL;
+}
+
+static bool smc_hs_congested(const struct sock *sk)
+{
+ const struct smc_sock *smc;
+
+ smc = smc_clcsock_user_data(sk);
+
+ if (!smc)
+ return true;
+
+ if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
+ return true;
+
+ return false;
+}
+
static struct smc_hashinfo smc_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
};
@@ -107,12 +203,27 @@ void smc_unhash_sk(struct sock *sk)
}
EXPORT_SYMBOL_GPL(smc_unhash_sk);
+/* This will be called before user really release sock_lock. So do the
+ * work which we didn't do because of user hold the sock_lock in the
+ * BH context
+ */
+static void smc_release_cb(struct sock *sk)
+{
+ struct smc_sock *smc = smc_sk(sk);
+
+ if (smc->conn.tx_in_release_sock) {
+ smc_tx_pending(&smc->conn);
+ smc->conn.tx_in_release_sock = false;
+ }
+}
+
struct proto smc_proto = {
.name = "SMC",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v4_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -125,17 +236,34 @@ struct proto smc_proto6 = {
.keepalive = smc_set_keepalive,
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
.obj_size = sizeof(struct smc_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
EXPORT_SYMBOL_GPL(smc_proto6);
+static void smc_fback_restore_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = NULL;
+
+ smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
+ smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
+ smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
+ smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
+}
+
static void smc_restore_fallback_changes(struct smc_sock *smc)
{
if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
smc->clcsock->file->private_data = smc->sk.sk_socket;
smc->clcsock->file = NULL;
+ smc_fback_restore_callbacks(smc);
}
}
@@ -183,7 +311,7 @@ static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
- int rc = 0;
+ int old_state, rc = 0;
if (!sk)
goto out;
@@ -191,8 +319,10 @@ static int smc_release(struct socket *sock)
sock_hold(sk); /* sock_put below */
smc = smc_sk(sk);
+ old_state = sk->sk_state;
+
/* cleanup for a dangling non-blocking connect */
- if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+ if (smc->connect_nonblock && old_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
if (cancel_work_sync(&smc->connect_work))
@@ -206,6 +336,10 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
+ if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
+ !smc->use_fallback)
+ smc_close_active_abort(smc);
+
rc = __smc_release(smc);
/* detach socket */
@@ -245,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
+ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
+ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -255,6 +391,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
mutex_init(&smc->clcsock_release_lock);
+ smc_init_saved_callbacks(smc);
return sk;
}
@@ -292,6 +429,7 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
goto out_rel;
smc->clcsock->sk->sk_reuse = sk->sk_reuse;
+ smc->clcsock->sk->sk_reuseport = sk->sk_reuseport;
rc = kernel_bind(smc->clcsock, uaddr, addr_len);
out_rel:
@@ -352,6 +490,29 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
+/* register the new vzalloced sndbuf on all links */
+static int smcr_lgr_reg_sndbufs(struct smc_link *link,
+ struct smc_buf_desc *snd_desc)
+{
+ struct smc_link_group *lgr = link->lgr;
+ int i, rc = 0;
+
+ if (!snd_desc->is_vm)
+ return -EINVAL;
+
+ /* protect against parallel smcr_link_reg_buf() */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_active(&lgr->lnk[i]))
+ continue;
+ rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
+ if (rc)
+ break;
+ }
+ mutex_unlock(&lgr->llc_conf_mutex);
+ return rc;
+}
+
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
@@ -363,13 +524,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
return rc;
/* protect against parallel smc_llc_cli_rkey_exchange() and
- * parallel smcr_link_reg_rmb()
+ * parallel smcr_link_reg_buf()
*/
mutex_lock(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
- rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
+ rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
if (rc)
goto out;
}
@@ -415,8 +576,15 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
- if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
- return SMC_CLC_DECL_ERR_REGRMB;
+ /* reg the sndbuf if it was vzalloced */
+ if (smc->conn.sndbuf_desc->is_vm) {
+ if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
+ }
+
+ /* reg the rmb */
+ if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
@@ -566,11 +734,140 @@ static void smc_stat_fallback(struct smc_sock *smc)
mutex_unlock(&net->smc.mutex_fback_rsn);
}
-static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
+/* must be called under rcu read lock */
+static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
+{
+ struct socket_wq *wq;
+ __poll_t flags;
+
+ wq = rcu_dereference(smc->sk.sk_wq);
+ if (!skwq_has_sleeper(wq))
+ return;
+
+ /* wake up smc sk->sk_wq */
+ if (!key) {
+ /* sk_state_change */
+ wake_up_interruptible_all(&wq->wait);
+ } else {
+ flags = key_to_poll(key);
+ if (flags & (EPOLLIN | EPOLLOUT))
+ /* sk_data_ready or sk_write_space */
+ wake_up_interruptible_sync_poll(&wq->wait, flags);
+ else if (flags & EPOLLERR)
+ /* sk_error_report */
+ wake_up_interruptible_poll(&wq->wait, flags);
+ }
+}
+
+static int smc_fback_mark_woken(wait_queue_entry_t *wait,
+ unsigned int mode, int sync, void *key)
+{
+ struct smc_mark_woken *mark =
+ container_of(wait, struct smc_mark_woken, wait_entry);
+
+ mark->woken = true;
+ mark->key = key;
+ return 0;
+}
+
+static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
+ void (*clcsock_callback)(struct sock *sk))
+{
+ struct smc_mark_woken mark = { .woken = false };
+ struct socket_wq *wq;
+
+ init_waitqueue_func_entry(&mark.wait_entry,
+ smc_fback_mark_woken);
+ rcu_read_lock();
+ wq = rcu_dereference(clcsk->sk_wq);
+ if (!wq)
+ goto out;
+ add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+ clcsock_callback(clcsk);
+ remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
+
+ if (mark.woken)
+ smc_fback_wakeup_waitqueue(smc, mark.key);
+out:
+ rcu_read_unlock();
+}
+
+static void smc_fback_state_change(struct sock *clcsk)
+{
+ struct smc_sock *smc;
+
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_state_change);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_data_ready(struct sock *clcsk)
+{
+ struct smc_sock *smc;
+
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_data_ready);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_write_space(struct sock *clcsk)
+{
+ struct smc_sock *smc;
+
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_write_space);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_error_report(struct sock *clcsk)
+{
+ struct smc_sock *smc;
+
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_error_report);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_replace_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+
+ smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
+ &smc->clcsk_state_change);
+ smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
+ &smc->clcsk_data_ready);
+ smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
+ &smc->clcsk_write_space);
+ smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
+ &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
- wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
- wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
- unsigned long flags;
+ int rc = 0;
+
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ rc = -EBADF;
+ goto out;
+ }
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
@@ -582,22 +879,30 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
- /* There may be some entries remaining in
- * smc socket->wq, which should be removed
- * to clcsocket->wq during the fallback.
+ /* There might be some wait entries remaining
+ * in smc sk->sk_wq and they should be woken up
+ * as clcsock's wait queue is woken up.
*/
- spin_lock_irqsave(&smc_wait->lock, flags);
- spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
- list_splice_init(&smc_wait->head, &clc_wait->head);
- spin_unlock(&clc_wait->lock);
- spin_unlock_irqrestore(&smc_wait->lock, flags);
+ smc_fback_replace_callbacks(smc);
}
+out:
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc_switch_to_fallback(smc, reason_code);
+ struct net *net = sock_net(&smc->sk);
+ int rc = 0;
+
+ rc = smc_switch_to_fallback(smc, reason_code);
+ if (rc) { /* fallback fails */
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
+ return rc;
+ }
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
@@ -634,9 +939,13 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first)
{
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
+ bool lgr_valid = false;
+
+ if (smc_conn_lgr_valid(conn))
+ lgr_valid = true;
smc_conn_free(conn);
- if (local_first)
+ if (local_first && lgr_valid)
smc_lgr_cleanup_early(lgr);
}
@@ -945,12 +1254,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
goto connect_abort;
}
} else {
+ /* reg sendbufs if they were vzalloced */
+ if (smc->conn.sndbuf_desc->is_vm) {
+ if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
+ reason_code = SMC_CLC_DECL_ERR_REGBUF;
+ goto connect_abort;
+ }
+ }
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
- reason_code = SMC_CLC_DECL_ERR_REGRMB;
+ reason_code = SMC_CLC_DECL_ERR_REGBUF;
goto connect_abort;
}
}
- smc_rmb_sync_sg_for_device(&smc->conn);
if (aclc->hdr.version > SMC_V1) {
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
@@ -1162,8 +1477,14 @@ static int __smc_connect(struct smc_sock *smc)
/* perform CLC handshake */
rc = smc_connect_clc(smc, aclc2, ini);
- if (rc)
+ if (rc) {
+ /* -EAGAIN on timeout, see tcp_recvmsg() */
+ if (rc == -EAGAIN) {
+ rc = -ETIMEDOUT;
+ smc->sk.sk_err = ETIMEDOUT;
+ }
goto vlan_cleanup;
+ }
/* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc);
@@ -1222,6 +1543,8 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_state = SMC_CLOSED;
if (rc == -EPIPE || rc == -EAGAIN)
smc->sk.sk_err = EPIPE;
+ else if (rc == -ECONNREFUSED)
+ smc->sk.sk_err = ECONNREFUSED;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
sock_put(&smc->sk); /* passive closing */
@@ -1260,9 +1583,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
lock_sock(sk);
+ switch (sock->state) {
+ default:
+ rc = -EINVAL;
+ goto out;
+ case SS_CONNECTED:
+ rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
+ goto out;
+ case SS_CONNECTING:
+ if (sk->sk_state == SMC_ACTIVE)
+ goto connected;
+ break;
+ case SS_UNCONNECTED:
+ sock->state = SS_CONNECTING;
+ break;
+ }
+
switch (sk->sk_state) {
default:
goto out;
+ case SMC_CLOSED:
+ rc = sock_error(sk) ? : -ECONNABORTED;
+ sock->state = SS_UNCONNECTED;
+ goto out;
case SMC_ACTIVE:
rc = -EISCONN;
goto out;
@@ -1280,21 +1623,25 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
if (rc && rc != -EINPROGRESS)
goto out;
- sock_hold(&smc->sk); /* sock put in passive closing */
- if (smc->use_fallback)
+ if (smc->use_fallback) {
+ sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
goto out;
+ }
+ sock_hold(&smc->sk); /* sock put in passive closing */
if (flags & O_NONBLOCK) {
if (queue_work(smc_hs_wq, &smc->connect_work))
smc->connect_nonblock = 1;
rc = -EINPROGRESS;
+ goto out;
} else {
rc = __smc_connect(smc);
if (rc < 0)
goto out;
- else
- rc = 0; /* success cases including fallback */
}
+connected:
+ rc = 0;
+ sock->state = SS_CONNECTED;
out:
release_sock(sk);
out_err:
@@ -1341,6 +1688,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
* function; switch it back to the original sk_data_ready function
*/
new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
+
+ /* if new clcsock has also inherited the fallback-specific callback
+ * functions, switch them back to the original ones.
+ */
+ if (lsmc->use_fallback) {
+ if (lsmc->clcsk_state_change)
+ new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
+ if (lsmc->clcsk_write_space)
+ new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
+ if (lsmc->clcsk_error_report)
+ new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
+ }
+
(*new_smc)->clcsock = new_clcsock;
out:
return rc;
@@ -1396,6 +1756,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
}
if (new_sock) {
sock_graft(new_sk, new_sock);
+ new_sock->state = SS_CONNECTED;
if (isk->use_fallback) {
smc_sk(new_sk)->clcsock->file = new_sock->file;
isk->clcsock->file->private_data = isk->clcsock;
@@ -1428,8 +1789,15 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
struct smc_llc_qentry *qentry;
int rc;
- if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
- return SMC_CLC_DECL_ERR_REGRMB;
+ /* reg the sndbuf if it was vzalloced*/
+ if (smc->conn.sndbuf_desc->is_vm) {
+ if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
+ }
+
+ /* reg the rmb */
+ if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
/* send CONFIRM LINK request to client over the RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
@@ -1469,6 +1837,9 @@ static void smc_listen_out(struct smc_sock *new_smc)
struct smc_sock *lsmc = new_smc->listen_smc;
struct sock *newsmcsk = &new_smc->sk;
+ if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
+ atomic_dec(&lsmc->queued_smc_hs);
+
if (lsmc->sk.sk_state == SMC_LISTEN) {
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -1487,7 +1858,6 @@ static void smc_listen_out_connected(struct smc_sock *new_smc)
{
struct sock *newsmcsk = &new_smc->sk;
- sk_refcnt_debug_inc(newsmcsk);
if (newsmcsk->sk_state == SMC_INIT)
newsmcsk->sk_state = SMC_ACTIVE;
@@ -1514,11 +1884,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
smc_conn_abort(new_smc, local_first);
- if (reason_code < 0) { /* error, no fallback possible */
+ if (reason_code < 0 ||
+ smc_switch_to_fallback(new_smc, reason_code)) {
+ /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_switch_to_fallback(new_smc, reason_code);
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
@@ -1784,10 +2155,15 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
struct smc_connection *conn = &new_smc->conn;
if (!local_first) {
+ /* reg sendbufs if they were vzalloced */
+ if (conn->sndbuf_desc->is_vm) {
+ if (smcr_lgr_reg_sndbufs(conn->lnk,
+ conn->sndbuf_desc))
+ return SMC_CLC_DECL_ERR_REGBUF;
+ }
if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
- return SMC_CLC_DECL_ERR_REGRMB;
+ return SMC_CLC_DECL_ERR_REGBUF;
}
- smc_rmb_sync_sg_for_device(&new_smc->conn);
return 0;
}
@@ -1835,6 +2211,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
not_found:
ini->smcr_version &= ~SMC_V2;
+ ini->smcrv2.ib_dev_v2 = NULL;
ini->check_smcrv2 = false;
}
@@ -1960,8 +2337,11 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
- smc_listen_out_connected(new_smc);
+ rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
+ if (rc)
+ smc_listen_out_err(new_smc);
+ else
+ smc_listen_out_connected(new_smc);
return;
}
@@ -2070,6 +2450,9 @@ static void smc_tcp_listen_work(struct work_struct *work)
if (!new_smc)
continue;
+ if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
+ atomic_inc(&lsmc->queued_smc_hs);
+
new_smc->listen_smc = lsmc;
new_smc->use_fallback = lsmc->use_fallback;
new_smc->fallback_rsn = lsmc->fallback_rsn;
@@ -2092,16 +2475,18 @@ static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
struct smc_sock *lsmc;
- lsmc = (struct smc_sock *)
- ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
+ read_lock_bh(&listen_clcsock->sk_callback_lock);
+ lsmc = smc_clcsock_user_data(listen_clcsock);
if (!lsmc)
- return;
+ goto out;
lsmc->clcsk_data_ready(listen_clcsock);
if (lsmc->sk.sk_state == SMC_LISTEN) {
sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
- if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work))
+ if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
sock_put(&lsmc->sk);
}
+out:
+ read_unlock_bh(&listen_clcsock->sk_callback_lock);
}
static int smc_listen(struct socket *sock, int backlog)
@@ -2115,7 +2500,7 @@ static int smc_listen(struct socket *sock, int backlog)
rc = -EINVAL;
if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
- smc->connect_nonblock)
+ smc->connect_nonblock || sock->state != SS_UNCONNECTED)
goto out;
rc = 0;
@@ -2133,13 +2518,31 @@ static int smc_listen(struct socket *sock, int backlog)
/* save original sk_data_ready function and establish
* smc-specific sk_data_ready function
*/
- smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
- smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
+ smc_clcsock_data_ready, &smc->clcsk_data_ready);
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
+
+ /* save original ops */
+ smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
+
+ smc->af_ops = *smc->ori_af_ops;
+ smc->af_ops.syn_recv_sock = smc_tcp_syn_recv_sock;
+
+ inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
+
+ if (smc->limit_smc_hs)
+ tcp_sk(smc->clcsock->sk)->smc_hs_congested = smc_hs_congested;
+
rc = kernel_listen(smc->clcsock, backlog);
if (rc) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
+ smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
goto out;
}
sk->sk_max_ack_backlog = backlog;
@@ -2250,7 +2653,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ if (rc)
+ goto out;
} else {
rc = -EINVAL;
goto out;
@@ -2387,6 +2792,17 @@ static int smc_shutdown(struct socket *sock, int how)
lock_sock(sk);
+ if (sock->state == SS_CONNECTING) {
+ if (sk->sk_state == SMC_ACTIVE)
+ sock->state = SS_CONNECTED;
+ else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
+ sk->sk_state == SMC_PEERCLOSEWAIT2 ||
+ sk->sk_state == SMC_APPCLOSEWAIT1 ||
+ sk->sk_state == SMC_APPCLOSEWAIT2 ||
+ sk->sk_state == SMC_APPFINCLOSEWAIT)
+ sock->state = SS_DISCONNECTING;
+ }
+
rc = -ENOTCONN;
if ((sk->sk_state != SMC_ACTIVE) &&
(sk->sk_state != SMC_PEERCLOSEWAIT1) &&
@@ -2398,8 +2814,11 @@ static int smc_shutdown(struct socket *sock, int how)
if (smc->use_fallback) {
rc = kernel_sock_shutdown(smc->clcsock, how);
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
sk->sk_state = SMC_CLOSED;
+ sk->sk_socket->state = SS_UNCONNECTED;
+ sock_put(sk);
+ }
goto out;
}
switch (how) {
@@ -2423,11 +2842,80 @@ static int smc_shutdown(struct socket *sock, int how)
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
+ if (sk->sk_state == SMC_CLOSED)
+ sock->state = SS_UNCONNECTED;
+ else
+ sock->state = SS_DISCONNECTING;
out:
release_sock(sk);
return rc ? rc : rc1;
}
+static int __smc_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct smc_sock *smc;
+ int val, len;
+
+ smc = smc_sk(sock->sk);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ len = min_t(int, len, sizeof(int));
+
+ if (len < 0)
+ return -EINVAL;
+
+ switch (optname) {
+ case SMC_LIMIT_HS:
+ val = smc->limit_smc_hs;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int __smc_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct smc_sock *smc;
+ int val, rc;
+
+ smc = smc_sk(sk);
+
+ lock_sock(sk);
+ switch (optname) {
+ case SMC_LIMIT_HS:
+ if (optlen < sizeof(int)) {
+ rc = -EINVAL;
+ break;
+ }
+ if (copy_from_sockptr(&val, optval, sizeof(int))) {
+ rc = -EFAULT;
+ break;
+ }
+
+ smc->limit_smc_hs = !!val;
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+ release_sock(sk);
+
+ return rc;
+}
+
static int smc_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -2437,12 +2925,19 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (level == SOL_TCP && optname == TCP_ULP)
return -EOPNOTSUPP;
+ else if (level == SOL_SMC)
+ return __smc_setsockopt(sock, level, optname, optval, optlen);
smc = smc_sk(sk);
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
if (unlikely(!smc->clcsock->ops->setsockopt))
rc = -EOPNOTSUPP;
else
@@ -2452,6 +2947,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk_error_report(sk);
}
+ mutex_unlock(&smc->clcsock_release_lock);
if (optlen < sizeof(int))
return -EINVAL;
@@ -2468,7 +2964,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
+ rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
}
@@ -2479,8 +2975,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_CLOSED) {
if (val) {
SMC_STAT_INC(smc, ndly_cnt);
- mod_delayed_work(smc->conn.lgr->tx_wq,
- &smc->conn.tx_work, 0);
+ smc_tx_pending(&smc->conn);
+ cancel_delayed_work(&smc->conn.tx_work);
}
}
break;
@@ -2490,8 +2986,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_CLOSED) {
if (!val) {
SMC_STAT_INC(smc, cork_cnt);
- mod_delayed_work(smc->conn.lgr->tx_wq,
- &smc->conn.tx_work, 0);
+ smc_tx_pending(&smc->conn);
+ cancel_delayed_work(&smc->conn.tx_work);
}
}
break;
@@ -2511,13 +3007,26 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct smc_sock *smc;
+ int rc;
+
+ if (level == SOL_SMC)
+ return __smc_getsockopt(sock, level, optname, optval, optlen);
smc = smc_sk(sock->sk);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (!smc->clcsock) {
+ mutex_unlock(&smc->clcsock_release_lock);
+ return -EBADF;
+ }
/* socket options apply to the CLC socket */
- if (unlikely(!smc->clcsock->ops->getsockopt))
+ if (unlikely(!smc->clcsock->ops->getsockopt)) {
+ mutex_unlock(&smc->clcsock_release_lock);
return -EOPNOTSUPP;
- return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ }
+ rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
+ optval, optlen);
+ mutex_unlock(&smc->clcsock_release_lock);
+ return rc;
}
static int smc_ioctl(struct socket *sock, unsigned int cmd,
@@ -2619,8 +3128,10 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
rc = kernel_sendpage(smc->clcsock, page, offset,
size, flags);
} else {
+ lock_sock(sk);
+ rc = smc_tx_sendpage(smc, page, offset, size, flags);
+ release_sock(sk);
SMC_STAT_INC(smc, sendpage_cnt);
- rc = sock_no_sendpage(sock, page, offset, size, flags);
}
out:
@@ -2720,6 +3231,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
+ sock->state = SS_UNCONNECTED;
sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
@@ -2729,6 +3241,9 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
smc->use_fallback = false; /* assume rdma capability first */
smc->fallback_rsn = 0;
+ /* default behavior from limit_smc_hs in every net namespace */
+ smc->limit_smc_hs = net->smc.limit_smc_hs;
+
rc = 0;
if (!clcsock) {
rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
@@ -2741,9 +3256,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
smc->clcsock = clcsock;
}
- smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
- smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
-
out:
return rc;
}
@@ -2822,11 +3334,17 @@ unsigned int smc_net_id;
static __net_init int smc_net_init(struct net *net)
{
+ int rc;
+
+ rc = smc_sysctl_net_init(net);
+ if (rc)
+ return rc;
return smc_pnet_net_init(net);
}
static void __net_exit smc_net_exit(struct net *net)
{
+ smc_sysctl_net_exit(net);
smc_pnet_net_exit(net);
}
@@ -2862,23 +3380,28 @@ static int __init smc_init(void)
rc = register_pernet_subsys(&smc_net_stat_ops);
if (rc)
- return rc;
+ goto out_pernet_subsys;
smc_ism_init();
smc_clc_init();
rc = smc_nl_init();
if (rc)
- goto out_pernet_subsys;
+ goto out_pernet_subsys_stat;
rc = smc_pnet_init();
if (rc)
goto out_nl;
rc = -ENOMEM;
+
+ smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
+ if (!smc_tcp_ls_wq)
+ goto out_pnet;
+
smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
if (!smc_hs_wq)
- goto out_pnet;
+ goto out_alloc_tcp_ls_wq;
smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
if (!smc_close_wq)
@@ -2931,12 +3454,14 @@ static int __init smc_init(void)
rc = tcp_register_ulp(&smc_ulp_ops);
if (rc) {
pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
- goto out_sock;
+ goto out_ib;
}
static_branch_enable(&tcp_have_smc);
return 0;
+out_ib:
+ smc_ib_unregister_client();
out_sock:
sock_unregister(PF_SMC);
out_proto6:
@@ -2949,10 +3474,14 @@ out_alloc_wqs:
destroy_workqueue(smc_close_wq);
out_alloc_hs_wq:
destroy_workqueue(smc_hs_wq);
+out_alloc_tcp_ls_wq:
+ destroy_workqueue(smc_tcp_ls_wq);
out_pnet:
smc_pnet_exit();
out_nl:
smc_nl_exit();
+out_pernet_subsys_stat:
+ unregister_pernet_subsys(&smc_net_stat_ops);
out_pernet_subsys:
unregister_pernet_subsys(&smc_net_ops);
@@ -2967,6 +3496,7 @@ static void __exit smc_exit(void)
smc_core_exit();
smc_ib_unregister_client();
destroy_workqueue(smc_close_wq);
+ destroy_workqueue(smc_tcp_ls_wq);
destroy_workqueue(smc_hs_wq);
proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
@@ -2986,3 +3516,4 @@ MODULE_DESCRIPTION("smc socket address family");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_SMC);
MODULE_ALIAS_TCP_ULP("smc");
+MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1a4fc1c6c4ab..5ed765ea0c73 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -14,6 +14,7 @@
#include <linux/socket.h>
#include <linux/types.h>
#include <linux/compiler.h> /* __aligned */
+#include <net/genetlink.h>
#include <net/sock.h>
#include "smc_ib.h"
@@ -28,6 +29,7 @@
#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM
* devices
*/
+#define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */
extern struct proto smc_proto;
extern struct proto smc_proto6;
@@ -139,6 +141,12 @@ enum smc_urg_state {
SMC_URG_READ = 3, /* data was already read */
};
+struct smc_mark_woken {
+ bool woken;
+ void *key;
+ wait_queue_entry_t wait_entry;
+};
+
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
@@ -185,6 +193,7 @@ struct smc_connection {
* - dec on polled tx cqe
*/
wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
+ atomic_t tx_pushing; /* nr_threads trying tx push */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
@@ -204,6 +213,10 @@ struct smc_connection {
* data still pending
*/
char urg_rx_byte; /* urgent byte */
+ bool tx_in_release_sock;
+ /* flush pending tx data in
+ * sock release_cb()
+ */
atomic_t bytes_to_rcv; /* arrived data,
* not yet received
*/
@@ -221,14 +234,21 @@ struct smc_connection {
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
+ u8 freed : 1; /* normal termiation */
u8 out_of_sync : 1; /* out of sync with peer */
};
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
+ void (*clcsk_state_change)(struct sock *sk);
+ /* original stat_change fct. */
void (*clcsk_data_ready)(struct sock *sk);
- /* original data_ready fct. **/
+ /* original data_ready fct. */
+ void (*clcsk_write_space)(struct sock *sk);
+ /* original write_space fct. */
+ void (*clcsk_error_report)(struct sock *sk);
+ /* original error_report fct. */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct connect_work; /* handle non-blocking connect*/
@@ -236,9 +256,14 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
+ bool limit_smc_hs; /* put constraint on handshake */
bool use_fallback; /* fallback to tcp */
int fallback_rsn; /* reason for fallback */
u32 peer_diagnosis; /* decline reason from peer */
+ atomic_t queued_smc_hs; /* queued smc handshakes */
+ struct inet_connection_sock_af_ops af_ops;
+ const struct inet_connection_sock_af_ops *ori_af_ops;
+ /* original af ops */
int sockopt_defer_accept;
/* sockopt TCP_DEFER_ACCEPT
* value
@@ -263,6 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline void smc_init_saved_callbacks(struct smc_sock *smc)
+{
+ smc->clcsk_state_change = NULL;
+ smc->clcsk_data_ready = NULL;
+ smc->clcsk_write_space = NULL;
+ smc->clcsk_error_report = NULL;
+}
+
+static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
+{
+ return (struct smc_sock *)
+ ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
+}
+
+/* save target_cb in saved_cb, and replace target_cb with new_cb */
+static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
+ void (*new_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ /* only save once */
+ if (!*saved_cb)
+ *saved_cb = *target_cb;
+ *target_cb = new_cb;
+}
+
+/* restore target_cb to saved_cb, and reset saved_cb to NULL */
+static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ if (!*saved_cb)
+ return;
+ *target_cb = *saved_cb;
+ *saved_cb = NULL;
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
@@ -312,4 +372,9 @@ void smc_fill_gid_list(struct smc_link_group *lgr,
struct smc_gidlist *gidlist,
struct smc_ib_device *known_dev, u8 *known_gid);
+/* smc handshake limitation interface for netlink */
+int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb);
+int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
+int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info);
+
#endif /* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 84c8a4374fdd..53f63bfbaf5f 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -48,9 +48,19 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
}
- if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
- unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
- wake_up(&conn->cdc_pend_tx_wq);
+ if (atomic_dec_and_test(&conn->cdc_pend_tx_wr)) {
+ /* If user owns the sock_lock, mark the connection need sending.
+ * User context will later try to send when it release sock_lock
+ * in smc_release_cb()
+ */
+ if (sock_owned_by_user(&smc->sk))
+ conn->tx_in_release_sock = true;
+ else
+ smc_tx_pending(conn);
+
+ if (unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
+ wake_up(&conn->cdc_pend_tx_wq);
+ }
WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);
smc_tx_sndbuf_nonfull(smc);
@@ -72,7 +82,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
/* abnormal termination */
if (!rc)
smc_wr_tx_put_slot(link,
- (struct smc_wr_tx_pend_priv *)pend);
+ (struct smc_wr_tx_pend_priv *)(*pend));
rc = -EPIPE;
}
return rc;
@@ -197,7 +207,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
int rc;
- if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
+ if (!smc_conn_lgr_valid(conn) ||
+ (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
return -EPIPE;
if (conn->lgr->is_smcd) {
@@ -349,8 +360,12 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
if ((diff_cons && smc_tx_prepared_sends(conn)) ||
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
- conn->local_rx_ctrl.prod_flags.urg_data_pending)
- smc_tx_sndbuf_nonempty(conn);
+ conn->local_rx_ctrl.prod_flags.urg_data_pending) {
+ if (!sock_owned_by_user(&smc->sk))
+ smc_tx_pending(conn);
+ else
+ conn->tx_in_release_sock = true;
+ }
if (diff_cons && conn->urg_tx_pend &&
atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 6be95a2a7b25..1472f31480d8 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -191,7 +191,8 @@ static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq,
flags, SMC_NETLINK_DUMP_UEID);
if (!hdr)
return -ENOMEM;
- snprintf(ueid_str, sizeof(ueid_str), "%s", ueid);
+ memcpy(ueid_str, ueid, SMC_MAX_EID_LEN);
+ ueid_str[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) {
genlmsg_cancel(skb, hdr);
return -EMSGSIZE;
@@ -252,7 +253,8 @@ int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb)
goto end;
smc_ism_get_system_eid(&seid);
- snprintf(seid_str, sizeof(seid_str), "%s", seid);
+ memcpy(seid_str, seid, SMC_MAX_EID_LEN);
+ seid_str[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str))
goto err;
read_lock(&smc_clc_eid_table.lock);
@@ -774,7 +776,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
SMC_FIRST_CONTACT_MASK : 0;
- if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
+ if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) &&
smc_ib_is_valid_local_systemid())
memcpy(dclc.id_for_peer, local_systemid,
sizeof(local_systemid));
@@ -1032,7 +1034,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
ETH_ALEN);
hton24(clc->r0.qpn, link->roce_qp->qp_num);
clc->r0.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
+ htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
switch (clc->hdr.type) {
@@ -1044,8 +1046,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
break;
}
clc->r0.rmbe_size = conn->rmbe_size_short;
- clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[link->link_idx].sgl));
+ clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(clc->r0.psn, link->psn_initial);
if (version == SMC_V1) {
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 83f02f131fc0..5fee545c9a10 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -62,7 +62,7 @@
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
-#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
+#define SMC_CLC_DECL_ERR_REGBUF 0x09990003 /* reg rdma bufs failed */
#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 292e4d904ab6..31db7438857c 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -57,6 +57,9 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
if (!smc_tx_prepared_sends(&smc->conn))
return;
+ /* Send out corked data remaining in sndbuf */
+ smc_tx_pending(&smc->conn);
+
smc->wait_close_tx_prepared = 1;
add_wait_queue(sk_sleep(sk), &wait);
while (!signal_pending(current) && timeout) {
@@ -211,8 +214,11 @@ again:
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
}
smc_close_cleanup_listen(sk);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8935ef4811b0..c305d8dd23f8 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -211,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- if (!lgr)
+ if (!smc_conn_lgr_valid(conn))
return;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- conn->lgr = NULL;
}
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
@@ -348,6 +347,8 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
goto errattr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
+ goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
@@ -749,10 +750,14 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
}
get_device(&lnk->smcibdev->ibdev->dev);
atomic_inc(&lnk->smcibdev->lnk_cnt);
+ refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
+ lnk->clearing = 0;
lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
+ smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
lnk->link_idx = link_idx;
+ lnk->wr_rx_id_compl = 0;
smc_ibdev_cnt_inc(lnk);
smcr_copy_dev_info_to_link(lnk);
atomic_set(&lnk->conn_cnt, 0);
@@ -806,6 +811,7 @@ out:
lnk->state = SMC_LNK_UNUSED;
if (!atomic_dec_return(&smcibdev->lnk_cnt))
wake_up(&smcibdev->lnks_deleted);
+ smc_lgr_put(lgr); /* lgr_hold above */
return rc;
}
@@ -844,6 +850,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->terminating = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
+ refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
mutex_init(&lgr->sndbufs_lock);
mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
@@ -889,7 +896,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
- if (smc_wr_alloc_lgr_mem(lgr))
+ rc = smc_wr_alloc_lgr_mem(lgr);
+ if (rc)
goto free_wq;
smc_llc_lgr_init(lgr, smc);
@@ -903,6 +911,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->net = smc_ib_net(lnk->smcibdev);
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
+ lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
atomic_inc(&lgr_cnt);
}
smc->conn.lgr = lgr;
@@ -996,8 +1005,12 @@ void smc_switch_link_and_count(struct smc_connection *conn,
struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
+ /* link_hold in smc_conn_create() */
+ smcr_link_put(conn->lnk);
conn->lnk = to_lnk;
atomic_inc(&conn->lnk->conn_cnt);
+ /* link_put in smc_conn_free() */
+ smcr_link_hold(conn->lnk);
}
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
@@ -1078,34 +1091,37 @@ err_out:
return NULL;
}
-static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link_group *lgr)
{
+ struct mutex *lock; /* lock buffer list */
int rc;
- if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+ if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
mutex_lock(&lgr->llc_conf_mutex);
- smc_llc_do_delete_rkey(lgr, rmb_desc);
- rmb_desc->is_conf_rkey = false;
+ smc_llc_do_delete_rkey(lgr, buf_desc);
+ buf_desc->is_conf_rkey = false;
mutex_unlock(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
- if (rmb_desc->is_reg_err) {
+ if (buf_desc->is_reg_err) {
/* buf registration failed, reuse not possible */
- mutex_lock(&lgr->rmbs_lock);
- list_del(&rmb_desc->list);
- mutex_unlock(&lgr->rmbs_lock);
+ lock = is_rmb ? &lgr->rmbs_lock :
+ &lgr->sndbufs_lock;
+ mutex_lock(lock);
+ list_del(&buf_desc->list);
+ mutex_unlock(lock);
- smc_buf_free(lgr, true, rmb_desc);
+ smc_buf_free(lgr, is_rmb, buf_desc);
} else {
- rmb_desc->used = 0;
- memset(rmb_desc->cpu_addr, 0, rmb_desc->len);
+ buf_desc->used = 0;
+ memset(buf_desc->cpu_addr, 0, buf_desc->len);
}
}
@@ -1113,15 +1129,23 @@ static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
if (conn->sndbuf_desc) {
- conn->sndbuf_desc->used = 0;
- memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len);
+ if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
+ smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
+ } else {
+ conn->sndbuf_desc->used = 0;
+ memset(conn->sndbuf_desc->cpu_addr, 0,
+ conn->sndbuf_desc->len);
+ }
}
- if (conn->rmb_desc && lgr->is_smcd) {
- conn->rmb_desc->used = 0;
- memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len +
- sizeof(struct smcd_cdc_msg));
- } else if (conn->rmb_desc) {
- smcr_buf_unuse(conn->rmb_desc, lgr);
+ if (conn->rmb_desc) {
+ if (!lgr->is_smcd) {
+ smcr_buf_unuse(conn->rmb_desc, true, lgr);
+ } else {
+ conn->rmb_desc->used = 0;
+ memset(conn->rmb_desc->cpu_addr, 0,
+ conn->rmb_desc->len +
+ sizeof(struct smcd_cdc_msg));
+ }
}
}
@@ -1130,8 +1154,19 @@ void smc_conn_free(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- if (!lgr)
+ if (!lgr || conn->freed)
+ /* Connection has never been registered in a
+ * link group, or has already been freed.
+ */
return;
+
+ conn->freed = 1;
+ if (!smc_conn_lgr_valid(conn))
+ /* Connection has already unregistered from
+ * link group.
+ */
+ goto lgr_put;
+
if (lgr->is_smcd) {
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
@@ -1142,32 +1177,37 @@ void smc_conn_free(struct smc_connection *conn)
cancel_work_sync(&conn->abort_work);
}
if (!list_empty(&lgr->list)) {
- smc_lgr_unregister_conn(conn);
smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ smc_lgr_unregister_conn(conn);
}
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
+lgr_put:
+ if (!lgr->is_smcd)
+ smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
+ smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
}
/* unregister a link from a buf_desc */
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
- if (is_rmb)
+ if (is_rmb || buf_desc->is_vm)
buf_desc->is_reg_mr[lnk->link_idx] = false;
if (!buf_desc->is_map_ib[lnk->link_idx])
return;
- if (is_rmb) {
- if (buf_desc->mr_rx[lnk->link_idx]) {
- smc_ib_put_memory_region(
- buf_desc->mr_rx[lnk->link_idx]);
- buf_desc->mr_rx[lnk->link_idx] = NULL;
- }
+
+ if ((is_rmb || buf_desc->is_vm) &&
+ buf_desc->mr[lnk->link_idx]) {
+ smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
+ buf_desc->mr[lnk->link_idx] = NULL;
+ }
+ if (is_rmb)
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
- } else {
+ else
smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
- }
+
sg_free_table(&buf_desc->sgt[lnk->link_idx]);
buf_desc->is_map_ib[lnk->link_idx] = false;
}
@@ -1203,13 +1243,29 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
}
}
-/* must be called under lgr->llc_conf_mutex lock */
-void smcr_link_clear(struct smc_link *lnk, bool log)
+static void __smcr_link_clear(struct smc_link *lnk)
{
+ struct smc_link_group *lgr = lnk->lgr;
struct smc_ib_device *smcibdev;
- if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+ smc_wr_free_link_mem(lnk);
+ smc_ibdev_cnt_dec(lnk);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
+ smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk, bool log)
+{
+ if (!lnk->lgr || lnk->clearing ||
+ lnk->state == SMC_LNK_UNUSED)
return;
+ lnk->clearing = 1;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk, log);
smcr_buf_unmap_lgr(lnk);
@@ -1218,14 +1274,18 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
- smc_wr_free_link_mem(lnk);
- smc_ibdev_cnt_dec(lnk);
- put_device(&lnk->smcibdev->ibdev->dev);
- smcibdev = lnk->smcibdev;
- memset(lnk, 0, sizeof(struct smc_link));
- lnk->state = SMC_LNK_UNUSED;
- if (!atomic_dec_return(&smcibdev->lnk_cnt))
- wake_up(&smcibdev->lnks_deleted);
+ smcr_link_put(lnk); /* theoretically last link_put */
+}
+
+void smcr_link_hold(struct smc_link *lnk)
+{
+ refcount_inc(&lnk->refcnt);
+}
+
+void smcr_link_put(struct smc_link *lnk)
+{
+ if (refcount_dec_and_test(&lnk->refcnt))
+ __smcr_link_clear(lnk);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1236,8 +1296,10 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
- if (buf_desc->pages)
+ if (!buf_desc->is_vm && buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
+ else if (buf_desc->is_vm && buf_desc->cpu_addr)
+ vfree(buf_desc->cpu_addr);
kfree(buf_desc);
}
@@ -1290,6 +1352,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
__smc_lgr_free_bufs(lgr, true);
}
+/* won't be freed until no one accesses to lgr anymore */
+static void __smc_lgr_free(struct smc_link_group *lgr)
+{
+ smc_lgr_free_bufs(lgr);
+ if (lgr->is_smcd) {
+ if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
+ wake_up(&lgr->smcd->lgrs_deleted);
+ } else {
+ smc_wr_free_lgr_mem(lgr);
+ if (!atomic_dec_return(&lgr_cnt))
+ wake_up(&lgrs_deleted);
+ }
+ kfree(lgr);
+}
+
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
@@ -1305,19 +1382,23 @@ static void smc_lgr_free(struct smc_link_group *lgr)
smc_llc_lgr_clear(lgr);
}
- smc_lgr_free_bufs(lgr);
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
- if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
- wake_up(&lgr->smcd->lgrs_deleted);
- } else {
- smc_wr_free_lgr_mem(lgr);
- if (!atomic_dec_return(&lgr_cnt))
- wake_up(&lgrs_deleted);
}
- kfree(lgr);
+ smc_lgr_put(lgr); /* theoretically last lgr_put */
+}
+
+void smc_lgr_hold(struct smc_link_group *lgr)
+{
+ refcount_inc(&lgr->refcnt);
+}
+
+void smc_lgr_put(struct smc_link_group *lgr)
+{
+ if (refcount_dec_and_test(&lgr->refcnt))
+ __smc_lgr_free(lgr);
}
static void smc_sk_wake_ups(struct smc_sock *smc)
@@ -1469,16 +1550,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
/* Called when an SMCR device is removed or the smc module is unloaded.
* If smcibdev is given, all SMCR link groups using this device are terminated.
* If smcibdev is NULL, all SMCR link groups are terminated.
- *
- * We must wait here for QPs been destroyed before we destroy the CQs,
- * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
- * smc_sock cannot be released.
*/
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
- LIST_HEAD(lgr_linkdown_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
@@ -1490,7 +1566,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev)
- list_move_tail(&lgr->list, &lgr_linkdown_list);
+ smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
@@ -1502,16 +1578,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
__smc_lgr_terminate(lgr, false);
}
- list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].smcibdev == smcibdev) {
- mutex_lock(&lgr->llc_conf_mutex);
- smcr_link_down_cond(&lgr->lnk[i]);
- mutex_unlock(&lgr->llc_conf_mutex);
- }
- }
- }
-
if (smcibdev) {
if (atomic_read(&smcibdev->lnk_cnt))
wait_event(smcibdev->lnks_deleted,
@@ -1817,7 +1883,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
+ (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
conn->lgr = lgr;
@@ -1856,6 +1923,10 @@ create:
goto out;
}
}
+ smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
+ if (!conn->lgr->is_smcd)
+ smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
+ conn->freed = 0;
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
@@ -1937,42 +2008,72 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
*/
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
- return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+ return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
-/* map an rmb buf to a link */
+/* map an buf to a link */
static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link *lnk)
{
- int rc;
+ int rc, i, nents, offset, buf_size, size, access_flags;
+ struct scatterlist *sg;
+ void *buf;
if (buf_desc->is_map_ib[lnk->link_idx])
return 0;
- rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+ if (buf_desc->is_vm) {
+ buf = buf_desc->cpu_addr;
+ buf_size = buf_desc->len;
+ offset = offset_in_page(buf_desc->cpu_addr);
+ nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
+ } else {
+ nents = 1;
+ }
+
+ rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
if (rc)
return rc;
- sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
- buf_desc->cpu_addr, buf_desc->len);
+
+ if (buf_desc->is_vm) {
+ /* virtually contiguous buffer */
+ for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
+ size = min_t(int, PAGE_SIZE - offset, buf_size);
+ sg_set_page(sg, vmalloc_to_page(buf), size, offset);
+ buf += size / sizeof(*buf);
+ buf_size -= size;
+ offset = 0;
+ }
+ } else {
+ /* physically contiguous buffer */
+ sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+ buf_desc->cpu_addr, buf_desc->len);
+ }
/* map sg table to DMA address */
rc = smc_ib_buf_map_sg(lnk, buf_desc,
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
/* SMC protocol depends on mapping to one DMA address only */
- if (rc != 1) {
+ if (rc != nents) {
rc = -EAGAIN;
goto free_table;
}
- /* create a new memory region for the RMB */
- if (is_rmb) {
- rc = smc_ib_get_memory_region(lnk->roce_pd,
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE,
+ buf_desc->is_dma_need_sync |=
+ smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
+
+ if (is_rmb || buf_desc->is_vm) {
+ /* create a new memory region for the RMB or vzalloced sndbuf */
+ access_flags = is_rmb ?
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ IB_ACCESS_LOCAL_WRITE;
+
+ rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
buf_desc, lnk->link_idx);
if (rc)
goto buf_unmap;
- smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+ smc_ib_sync_sg_for_device(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
}
buf_desc->is_map_ib[lnk->link_idx] = true;
return 0;
@@ -1985,20 +2086,23 @@ free_table:
return rc;
}
-/* register a new rmb on IB device,
+/* register a new buf on IB device, rmb or vzalloced sndbuf
* must be called under lgr->llc_conf_mutex lock
*/
-int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
{
if (list_empty(&link->lgr->list))
return -ENOLINK;
- if (!rmb_desc->is_reg_mr[link->link_idx]) {
- /* register memory region for new rmb */
- if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
- rmb_desc->is_reg_err = true;
+ if (!buf_desc->is_reg_mr[link->link_idx]) {
+ /* register memory region for new buf */
+ if (buf_desc->is_vm)
+ buf_desc->mr[link->link_idx]->iova =
+ (uintptr_t)buf_desc->cpu_addr;
+ if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
+ buf_desc->is_reg_err = true;
return -EFAULT;
}
- rmb_desc->is_reg_mr[link->link_idx] = true;
+ buf_desc->is_reg_mr[link->link_idx] = true;
}
return 0;
}
@@ -2050,18 +2154,38 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
struct smc_buf_desc *buf_desc, *bf;
int i, rc = 0;
+ /* reg all RMBs for a new link */
mutex_lock(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
- rc = smcr_link_reg_rmb(lnk, buf_desc);
- if (rc)
- goto out;
+ rc = smcr_link_reg_buf(lnk, buf_desc);
+ if (rc) {
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+ }
}
}
-out:
mutex_unlock(&lgr->rmbs_lock);
+
+ if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
+ return rc;
+
+ /* reg all vzalloced sndbufs for a new link */
+ mutex_lock(&lgr->sndbufs_lock);
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
+ if (!buf_desc->used || !buf_desc->is_vm)
+ continue;
+ rc = smcr_link_reg_buf(lnk, buf_desc);
+ if (rc) {
+ mutex_unlock(&lgr->sndbufs_lock);
+ return rc;
+ }
+ }
+ }
+ mutex_unlock(&lgr->sndbufs_lock);
return rc;
}
@@ -2075,18 +2199,39 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
if (!buf_desc)
return ERR_PTR(-ENOMEM);
- buf_desc->order = get_order(bufsize);
- buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC | __GFP_COMP |
- __GFP_NORETRY | __GFP_ZERO,
- buf_desc->order);
- if (!buf_desc->pages) {
- kfree(buf_desc);
- return ERR_PTR(-EAGAIN);
- }
- buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
- buf_desc->len = bufsize;
+ switch (lgr->buf_type) {
+ case SMCR_PHYS_CONT_BUFS:
+ case SMCR_MIXED_BUFS:
+ buf_desc->order = get_order(bufsize);
+ buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NOMEMALLOC | __GFP_COMP |
+ __GFP_NORETRY | __GFP_ZERO,
+ buf_desc->order);
+ if (buf_desc->pages) {
+ buf_desc->cpu_addr =
+ (void *)page_address(buf_desc->pages);
+ buf_desc->len = bufsize;
+ buf_desc->is_vm = false;
+ break;
+ }
+ if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
+ goto out;
+ fallthrough; // try virtually continguous buf
+ case SMCR_VIRT_CONT_BUFS:
+ buf_desc->order = get_order(bufsize);
+ buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
+ if (!buf_desc->cpu_addr)
+ goto out;
+ buf_desc->pages = NULL;
+ buf_desc->len = bufsize;
+ buf_desc->is_vm = true;
+ break;
+ }
return buf_desc;
+
+out:
+ kfree(buf_desc);
+ return ERR_PTR(-EAGAIN);
}
/* map buf_desc on all usable links,
@@ -2095,7 +2240,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
struct smc_buf_desc *buf_desc, bool is_rmb)
{
- int i, rc = 0;
+ int i, rc = 0, cnt = 0;
/* protect against parallel link reconfiguration */
mutex_lock(&lgr->llc_conf_mutex);
@@ -2108,9 +2253,12 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
rc = -ENOMEM;
goto out;
}
+ cnt++;
}
out:
mutex_unlock(&lgr->llc_conf_mutex);
+ if (!rc && !cnt)
+ rc = -EINVAL;
return rc;
}
@@ -2163,10 +2311,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_rcvbuf / 2;
+ sk_buf_size = smc->sk.sk_rcvbuf;
else
/* use socket send buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_sndbuf / 2;
+ sk_buf_size = smc->sk.sk_sndbuf;
for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) {
@@ -2182,6 +2330,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
/* check for reusable slot in the link group */
buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
if (buf_desc) {
+ buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
break; /* found reusable slot */
@@ -2216,7 +2365,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (!is_smcd) {
if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
- smcr_buf_unuse(buf_desc, lgr);
+ smcr_buf_unuse(buf_desc, is_rmb, lgr);
return -ENOMEM;
}
}
@@ -2224,7 +2373,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short;
- smc->sk.sk_rcvbuf = bufsize * 2;
+ smc->sk.sk_rcvbuf = bufsize;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2232,22 +2381,18 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
- smc->sk.sk_sndbuf = bufsize * 2;
+ smc->sk.sk_sndbuf = bufsize;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
}
-void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
-{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
- return;
- smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
-}
-
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+ if (!conn->sndbuf_desc->is_dma_need_sync)
+ return;
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+ !smc_link_active(conn->lnk))
return;
smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
@@ -2256,7 +2401,9 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
int i;
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->rmb_desc->is_dma_need_sync)
+ return;
+ if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
return;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&conn->lgr->lnk[i]))
@@ -2266,20 +2413,6 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
}
}
-void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
-{
- int i;
-
- if (!conn->lgr || conn->lgr->is_smcd)
- return;
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_active(&conn->lgr->lnk[i]))
- continue;
- smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
- DMA_FROM_DEVICE);
- }
-}
-
/* create the send and receive buffer for an SMC socket;
* receive buffers are called RMBs;
* (even though the SMC protocol allows more than one RMB-element per RMB,
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 521c64a3d8d3..285f9bd8e232 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -115,8 +115,10 @@ struct smc_link {
dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */
dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
u64 wr_rx_id; /* seq # of last recv WR */
+ u64 wr_rx_id_compl; /* seq # of last completed WR */
u32 wr_rx_cnt; /* number of WR recv buffers */
unsigned long wr_rx_tstamp; /* jiffies when last buf rx */
+ wait_queue_head_t wr_rx_empty_wait; /* wait for RQ empty */
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
@@ -137,6 +139,8 @@ struct smc_link {
u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
u8 link_idx; /* index in lgr link array */
u8 link_is_asym; /* is link asymmetric? */
+ u8 clearing : 1; /* link is being cleared */
+ refcount_t refcnt; /* link reference count */
struct smc_link_group *lgr; /* parent link group */
struct work_struct link_down_wrk; /* wrk to bring link down */
char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */
@@ -166,9 +170,11 @@ struct smc_buf_desc {
struct { /* SMC-R */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
- /* for rmb only: memory region
+ struct ib_mr *mr[SMC_LINKS_PER_LGR_MAX];
+ /* memory region: for rmb and
+ * vzalloced sndbuf
* incl. rkey provided to peer
+ * and lkey provided to local
*/
u32 order; /* allocation order */
@@ -178,8 +184,11 @@ struct smc_buf_desc {
/* mem region registered */
u8 is_map_ib[SMC_LINKS_PER_LGR_MAX];
/* mem region mapped to lnk */
+ u8 is_dma_need_sync;
u8 is_reg_err;
/* buffer registration err */
+ u8 is_vm;
+ /* virtually contiguous */
};
struct { /* SMC-D */
unsigned short sba_idx;
@@ -214,6 +223,12 @@ enum smc_lgr_type { /* redundancy state of lgr */
SMC_LGR_ASYMMETRIC_LOCAL, /* local has 1, peer 2 active RNICs */
};
+enum smcr_buf_type { /* types of SMC-R sndbufs and RMBs */
+ SMCR_PHYS_CONT_BUFS = 0,
+ SMCR_VIRT_CONT_BUFS = 1,
+ SMCR_MIXED_BUFS = 2,
+};
+
enum smc_llc_flowtype {
SMC_LLC_FLOW_NONE = 0,
SMC_LLC_FLOW_ADD_LINK = 2,
@@ -249,6 +264,7 @@ struct smc_link_group {
u8 terminating : 1;/* lgr is terminating */
u8 freeing : 1; /* lgr is being freed */
+ refcount_t refcnt; /* lgr reference count */
bool is_smcd; /* SMC-R or SMC-D */
u8 smc_version;
u8 negotiated_eid[SMC_MAX_EID_LEN];
@@ -274,6 +290,7 @@ struct smc_link_group {
/* used rtoken elements */
u8 next_link_id;
enum smc_lgr_type type;
+ enum smcr_buf_type buf_type;
/* redundancy state */
u8 pnet_id[SMC_MAX_PNETID_LEN + 1];
/* pnet id of this lgr */
@@ -409,6 +426,11 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+static inline bool smc_conn_lgr_valid(struct smc_connection *conn)
+{
+ return conn->lgr && conn->alert_token_local;
+}
+
/*
* Returns true if the specified link is usable.
*
@@ -487,6 +509,8 @@ struct smc_clc_msg_accept_confirm;
void smc_lgr_cleanup_early(struct smc_link_group *lgr);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
+void smc_lgr_hold(struct smc_link_group *lgr);
+void smc_lgr_put(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -503,10 +527,8 @@ void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
__be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
__be64 nw_vaddr, __be32 nw_rkey);
-void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
-void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
@@ -518,6 +540,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smcr_link_hold(struct smc_link *lnk);
+void smcr_link_put(struct smc_link *lnk);
void smc_switch_link_and_count(struct smc_connection *conn,
struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
@@ -525,7 +549,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk);
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
enum smc_lgr_type new_type, int asym_lnk_idx);
-int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *rmb_desc);
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
struct smc_link *from_lnk, bool is_dev_err);
void smcr_link_down_cond(struct smc_link *lnk);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 7c8dad28c18d..80ea7d954ece 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
r->diag_state = sk->sk_state;
if (smc->use_fallback)
r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
- else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
+ else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd)
r->diag_mode = SMC_DIAG_MODE_SMCD;
else
r->diag_mode = SMC_DIAG_MODE_SMCR;
@@ -142,17 +142,15 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+ if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_link *link = smc->conn.lnk;
- struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net);
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = link->ibport,
.lnk[0].link_id = link->link_id,
- .lnk[0].net_cookie = net->net_cookie,
};
memcpy(linfo.lnk[0].ibname,
@@ -164,7 +162,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
}
- if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+ if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
@@ -270,3 +268,4 @@ module_init(smc_diag_init);
module_exit(smc_diag_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */);
+MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index a3e2d3b89568..854772dd52fd 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -671,6 +671,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = sges_per_buf,
+ .max_inline_data = 0,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
@@ -697,7 +698,7 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
int sg_num;
/* map the largest prefix of a dma mapped SG list */
- sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
+ sg_num = ib_map_mr_sg(buf_slot->mr[link_idx],
buf_slot->sgt[link_idx].sgl,
buf_slot->sgt[link_idx].orig_nents,
&offset, PAGE_SIZE);
@@ -709,25 +710,49 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot, u8 link_idx)
{
- if (buf_slot->mr_rx[link_idx])
+ if (buf_slot->mr[link_idx])
return 0; /* already done */
- buf_slot->mr_rx[link_idx] =
+ buf_slot->mr[link_idx] =
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
- if (IS_ERR(buf_slot->mr_rx[link_idx])) {
+ if (IS_ERR(buf_slot->mr[link_idx])) {
int rc;
- rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
- buf_slot->mr_rx[link_idx] = NULL;
+ rc = PTR_ERR(buf_slot->mr[link_idx]);
+ buf_slot->mr[link_idx] = NULL;
return rc;
}
- if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
+ if (smc_ib_map_mr_sg(buf_slot, link_idx) !=
+ buf_slot->sgt[link_idx].orig_nents)
return -EINVAL;
return 0;
}
+bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
+ struct smc_buf_desc *buf_slot)
+{
+ struct scatterlist *sg;
+ unsigned int i;
+ bool ret = false;
+
+ /* for now there is just one DMA address */
+ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+ buf_slot->sgt[lnk->link_idx].nents, i) {
+ if (!sg_dma_len(sg))
+ break;
+ if (dma_need_sync(lnk->smcibdev->ibdev->dma_device,
+ sg_dma_address(sg))) {
+ ret = true;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
/* synchronize buffer usage for cpu access */
void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
@@ -736,6 +761,9 @@ void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct scatterlist *sg;
unsigned int i;
+ if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx)))
+ return;
+
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
@@ -756,6 +784,9 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct scatterlist *sg;
unsigned int i;
+ if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx)))
+ return;
+
/* for now there is just one DMA address */
for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
buf_slot->sgt[lnk->link_idx].nents, i) {
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 5d8b49c57f50..034295676e88 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -102,6 +102,8 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot, u8 link_idx);
void smc_ib_put_memory_region(struct ib_mr *mr);
+bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
+ struct smc_buf_desc *buf_slot);
void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index a2084ecdb97e..911fe08bc54b 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -33,17 +33,6 @@ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
vlan_id);
}
-int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
- void *data, size_t len)
-{
- int rc;
-
- rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
- pos->offset, data, len);
-
- return rc < 0 ? rc : 0;
-}
-
void smc_ism_get_system_eid(u8 **eid)
{
if (!smc_ism_v2_capable)
@@ -440,7 +429,7 @@ int smcd_register_dev(struct smcd_dev *smcd)
if (list_empty(&smcd_dev_list.list)) {
u8 *system_eid = NULL;
- smcd->ops->get_system_eid(smcd, &system_eid);
+ system_eid = smcd->ops->get_system_eid();
if (system_eid[24] != '0' || system_eid[28] != '0') {
smc_ism_v2_capable = true;
memcpy(smc_ism_v2_system_eid, system_eid,
@@ -519,13 +508,13 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
EXPORT_SYMBOL_GPL(smcd_handle_event);
/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
- * Parameters are smcd device pointer and DMB number. Find the connection and
- * schedule the tasklet for this connection.
+ * Parameters are smcd device pointer, DMB number, and the DMBE bitmask.
+ * Find the connection and schedule the tasklet for this connection.
*
* Context:
* - Function called in IRQ context from ISM device driver IRQ handler.
*/
-void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
+void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask)
{
struct smc_connection *conn = NULL;
unsigned long flags;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 004b22a13ffa..d6b2db604fe8 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -28,13 +28,6 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */
refcount_t refcnt; /* Reference count */
};
-struct smc_ism_position { /* ISM device position to write to */
- u64 token; /* Token of DMB */
- u32 offset; /* Offset into DMBE */
- u8 index; /* Index of DMBE */
- u8 signal; /* Generate interrupt on owner side */
-};
-
struct smcd_dev;
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
@@ -45,12 +38,21 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
struct smc_buf_desc *dmb_desc);
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
-int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
- void *data, size_t len);
int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
void smc_ism_init(void);
int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
+
+static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
+ unsigned int idx, bool sf, unsigned int offset,
+ void *data, size_t len)
+{
+ int rc;
+
+ rc = smcd->ops->move_data(smcd, dmb_tok, idx, sf, offset, data, len);
+ return rc < 0 ? rc : 0;
+}
+
#endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c4d057b2941d..524649d0ab65 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -505,19 +505,22 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
if (smc_link_active(link) && link != send_link) {
rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
rkeyllc->rtoken[rtok_ix].rmb_key =
- htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
- rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
- (u64)sg_dma_address(
- rmb_desc->sgt[link->link_idx].sgl));
+ htonl(rmb_desc->mr[link->link_idx]->rkey);
+ rkeyllc->rtoken[rtok_ix].rmb_vaddr = rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (rmb_desc->sgt[link->link_idx].sgl));
rtok_ix++;
}
}
/* rkey of send_link is in rtoken[0] */
rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
rkeyllc->rtoken[0].rmb_key =
- htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
- rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
- (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
+ htonl(rmb_desc->mr[send_link->link_idx]->rkey);
+ rkeyllc->rtoken[0].rmb_vaddr = rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(send_link, pend);
put_out:
@@ -544,7 +547,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->hd.common.llc_type = SMC_LLC_DELETE_RKEY;
smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc));
rkeyllc->num_rkeys = 1;
- rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
+ rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
put_out:
@@ -614,9 +617,10 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
if (!buf_pos)
break;
rmb = buf_pos;
- ext->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
- ext->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
- ext->rt[i].rmb_vaddr_new =
+ ext->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey);
+ ext->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey);
+ ext->rt[i].rmb_vaddr_new = rmb->is_vm ?
+ cpu_to_be64((uintptr_t)rmb->cpu_addr) :
cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
while (buf_pos && !(buf_pos)->used)
@@ -852,9 +856,10 @@ static int smc_llc_add_link_cont(struct smc_link *link,
}
rmb = *buf_pos;
- addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
- addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
- addc_llc->rt[i].rmb_vaddr_new =
+ addc_llc->rt[i].rmb_key = htonl(rmb->mr[prim_lnk_idx]->rkey);
+ addc_llc->rt[i].rmb_key_new = htonl(rmb->mr[lnk_idx]->rkey);
+ addc_llc->rt[i].rmb_vaddr_new = rmb->is_vm ?
+ cpu_to_be64((uintptr_t)rmb->cpu_addr) :
cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
(*num_rkeys_todo)--;
@@ -2122,7 +2127,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+ lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
}
/* called after lgr was removed from lgr_list */
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 4404e52b3346..7e7a3162c68b 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -19,6 +19,7 @@
#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
#define SMC_LLC_WAIT_TIME (2 * HZ)
+#define SMC_LLC_TESTLINK_DEFAULT_TIME (30 * HZ)
enum smc_llc_reqresp {
SMC_LLC_REQ,
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index f13ab0661ed5..621c46c70073 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -111,6 +111,21 @@ static const struct genl_ops smc_gen_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.doit = smc_nl_disable_seid,
},
+ {
+ .cmd = SMC_NETLINK_DUMP_HS_LIMITATION,
+ /* can be retrieved by unprivileged users */
+ .dumpit = smc_nl_dump_hs_limitation,
+ },
+ {
+ .cmd = SMC_NETLINK_ENABLE_HS_LIMITATION,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_enable_hs_limitation,
+ },
+ {
+ .cmd = SMC_NETLINK_DISABLE_HS_LIMITATION,
+ .flags = GENL_ADMIN_PERM,
+ .doit = smc_nl_disable_hs_limitation,
+ },
};
static const struct nla_policy smc_gen_nl_policy[2] = {
@@ -127,7 +142,8 @@ struct genl_family smc_gen_nl_family __ro_after_init = {
.netnsok = true,
.module = THIS_MODULE,
.ops = smc_gen_nl_ops,
- .n_ops = ARRAY_SIZE(smc_gen_nl_ops)
+ .n_ops = ARRAY_SIZE(smc_gen_nl_ops),
+ .resv_start_op = SMC_NETLINK_DISABLE_HS_LIMITATION + 1,
};
int __init smc_nl_init(void)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index db9825c01e0a..25fb2fd186e2 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -113,14 +113,15 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
pnettable = &sn->pnettable;
/* remove table entry */
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
list_del(&pnetelem->list);
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
- dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
+ netdev_put(pnetelem->ndev,
+ &pnetelem->dev_tracker);
pr_warn_ratelimited("smc: net device %s "
"erased user defined "
"pnetid %.16s\n",
@@ -131,7 +132,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
rc = 0;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
/* if this is not the initial namespace, stop here */
if (net != &init_net)
@@ -192,11 +193,11 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
!strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
- dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC);
+ netdev_hold(ndev, &pnetelem->dev_tracker, GFP_ATOMIC);
pnetelem->ndev = ndev;
rc = 0;
pr_warn_ratelimited("smc: adding net device %s with "
@@ -206,7 +207,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -224,10 +225,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
- dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
+ netdev_put(pnetelem->ndev, &pnetelem->dev_tracker);
pnetelem->ndev = NULL;
rc = 0;
pr_warn_ratelimited("smc: removing net device %s with "
@@ -237,7 +238,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
break;
}
}
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -311,8 +312,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
if (!strncmp(ibdev->ibdev->name, ib_name,
sizeof(ibdev->ibdev->name)) ||
- !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
- IB_DEVICE_NAME_MAX - 1)) {
+ (ibdev->ibdev->dev.parent &&
+ !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
+ IB_DEVICE_NAME_MAX - 1))) {
goto out;
}
}
@@ -368,11 +370,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
- new_pe->ndev = ndev;
- netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
rc = -EEXIST;
new_netdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_ETH &&
!strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
@@ -381,10 +381,15 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
}
}
if (new_netdev) {
+ if (ndev) {
+ new_pe->ndev = ndev;
+ netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
+ GFP_ATOMIC);
+ }
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
goto out_put;
}
@@ -445,7 +450,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
new_pe->ib_port = ib_port;
new_ibdev = true;
- write_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -455,9 +460,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
}
if (new_ibdev) {
list_add_tail(&new_pe->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
} else {
- write_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
kfree(new_pe);
}
return (new_ibdev) ? 0 : -EEXIST;
@@ -602,7 +607,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
pnettable = &sn->pnettable;
/* dump pnettable entries */
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
@@ -617,7 +622,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return idx;
}
@@ -710,7 +715,8 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = {
.netnsok = true,
.module = THIS_MODULE,
.ops = smc_pnet_ops,
- .n_ops = ARRAY_SIZE(smc_pnet_ops)
+ .n_ops = ARRAY_SIZE(smc_pnet_ops),
+ .resv_start_op = SMC_PNETID_FLUSH + 1,
};
bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid)
@@ -861,12 +867,15 @@ int smc_pnet_net_init(struct net *net)
struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
INIT_LIST_HEAD(&pnettable->pnetlist);
- rwlock_init(&pnettable->lock);
+ mutex_init(&pnettable->lock);
INIT_LIST_HEAD(&pnetids_ndev->list);
rwlock_init(&pnetids_ndev->lock);
smc_pnet_create_pnetids_list(net);
+ /* disable handshake limitation by default */
+ net->smc.limit_smc_hs = 0;
+
return 0;
}
@@ -941,7 +950,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
@@ -950,7 +959,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1153,7 +1162,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
@@ -1163,7 +1172,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
@@ -1182,7 +1191,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
sn = net_generic(&init_net, smc_net_id);
pnettable = &sn->pnettable;
- read_lock(&pnettable->lock);
+ mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
if (tmp_pe->type == SMC_PNET_IB &&
!strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
@@ -1191,7 +1200,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
break;
}
}
- read_unlock(&pnettable->lock);
+ mutex_unlock(&pnettable->lock);
return rc;
}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 14039272f7e4..80a88eea4949 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -29,7 +29,7 @@ struct smc_link_group;
* @pnetlist: List of PNETIDs
*/
struct smc_pnettable {
- rwlock_t lock;
+ struct mutex lock;
struct list_head pnetlist;
};
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 51e8eb2933ff..17c5aee7ee4f 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -145,35 +145,93 @@ static void smc_rx_spd_release(struct splice_pipe_desc *spd,
static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
struct smc_sock *smc)
{
+ struct smc_link_group *lgr = smc->conn.lgr;
+ int offset = offset_in_page(src);
+ struct partial_page *partial;
struct splice_pipe_desc spd;
- struct partial_page partial;
- struct smc_spd_priv *priv;
- int bytes;
+ struct smc_spd_priv **priv;
+ struct page **pages;
+ int bytes, nr_pages;
+ int i;
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ nr_pages = !lgr->is_smcd && smc->conn.rmb_desc->is_vm ?
+ PAGE_ALIGN(len + offset) / PAGE_SIZE : 1;
+
+ pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ goto out;
+ partial = kcalloc(nr_pages, sizeof(*partial), GFP_KERNEL);
+ if (!partial)
+ goto out_page;
+ priv = kcalloc(nr_pages, sizeof(*priv), GFP_KERNEL);
if (!priv)
- return -ENOMEM;
- priv->len = len;
- priv->smc = smc;
- partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr;
- partial.len = len;
- partial.private = (unsigned long)priv;
-
- spd.nr_pages_max = 1;
- spd.nr_pages = 1;
- spd.pages = &smc->conn.rmb_desc->pages;
- spd.partial = &partial;
+ goto out_part;
+ for (i = 0; i < nr_pages; i++) {
+ priv[i] = kzalloc(sizeof(**priv), GFP_KERNEL);
+ if (!priv[i])
+ goto out_priv;
+ }
+
+ if (lgr->is_smcd ||
+ (!lgr->is_smcd && !smc->conn.rmb_desc->is_vm)) {
+ /* smcd or smcr that uses physically contiguous RMBs */
+ priv[0]->len = len;
+ priv[0]->smc = smc;
+ partial[0].offset = src - (char *)smc->conn.rmb_desc->cpu_addr;
+ partial[0].len = len;
+ partial[0].private = (unsigned long)priv[0];
+ pages[0] = smc->conn.rmb_desc->pages;
+ } else {
+ int size, left = len;
+ void *buf = src;
+ /* smcr that uses virtually contiguous RMBs*/
+ for (i = 0; i < nr_pages; i++) {
+ size = min_t(int, PAGE_SIZE - offset, left);
+ priv[i]->len = size;
+ priv[i]->smc = smc;
+ pages[i] = vmalloc_to_page(buf);
+ partial[i].offset = offset;
+ partial[i].len = size;
+ partial[i].private = (unsigned long)priv[i];
+ buf += size / sizeof(*buf);
+ left -= size;
+ offset = 0;
+ }
+ }
+ spd.nr_pages_max = nr_pages;
+ spd.nr_pages = nr_pages;
+ spd.pages = pages;
+ spd.partial = partial;
spd.ops = &smc_pipe_ops;
spd.spd_release = smc_rx_spd_release;
bytes = splice_to_pipe(pipe, &spd);
if (bytes > 0) {
sock_hold(&smc->sk);
- get_page(smc->conn.rmb_desc->pages);
+ if (!lgr->is_smcd && smc->conn.rmb_desc->is_vm) {
+ for (i = 0; i < PAGE_ALIGN(bytes + offset) / PAGE_SIZE; i++)
+ get_page(pages[i]);
+ } else {
+ get_page(smc->conn.rmb_desc->pages);
+ }
atomic_add(bytes, &smc->conn.splice_pending);
}
+ kfree(priv);
+ kfree(partial);
+ kfree(pages);
return bytes;
+
+out_priv:
+ for (i = (i - 1); i >= 0; i--)
+ kfree(priv[i]);
+ kfree(priv);
+out_part:
+ kfree(partial);
+out_page:
+ kfree(pages);
+out:
+ return -ENOMEM;
}
static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn)
@@ -355,12 +413,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
break;
}
+ if (!timeo)
+ return -EAGAIN;
if (signal_pending(current)) {
read_done = sock_intr_errno(timeo);
break;
}
- if (!timeo)
- return -EAGAIN;
}
if (!smc_rx_data_available(conn)) {
@@ -413,7 +471,6 @@ copy:
if (rc < 0) {
if (!read_done)
read_done = -EFAULT;
- smc_rmb_sync_sg_for_device(conn);
goto out;
}
}
@@ -427,7 +484,6 @@ copy:
chunk_len_sum += chunk_len;
chunk_off = 0; /* modulo offset in recv ring buffer */
}
- smc_rmb_sync_sg_for_device(conn);
/* update cursors */
if (!(flags & MSG_PEEK)) {
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
new file mode 100644
index 000000000000..b6f79fabb9d3
--- /dev/null
+++ b/net/smc/smc_sysctl.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * smc_sysctl.c: sysctl interface to SMC subsystem.
+ *
+ * Copyright (c) 2022, Alibaba Inc.
+ *
+ * Author: Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_llc.h"
+#include "smc_sysctl.h"
+
+static int min_sndbuf = SMC_BUF_MIN_SIZE;
+static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+
+static struct ctl_table smc_table[] = {
+ {
+ .procname = "autocorking_size",
+ .data = &init_net.smc.sysctl_autocorking_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
+ .procname = "smcr_buf_type",
+ .data = &init_net.smc.sysctl_smcr_buf_type,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "smcr_testlink_time",
+ .data = &init_net.smc.sysctl_smcr_testlink_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "wmem",
+ .data = &init_net.smc.sysctl_wmem,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_sndbuf,
+ },
+ {
+ .procname = "rmem",
+ .data = &init_net.smc.sysctl_rmem,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_rcvbuf,
+ },
+ { }
+};
+
+int __net_init smc_sysctl_net_init(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = smc_table;
+ if (!net_eq(net, &init_net)) {
+ int i;
+
+ table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+
+ for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++)
+ table[i].data += (void *)net - (void *)&init_net;
+ }
+
+ net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table);
+ if (!net->smc.smc_hdr)
+ goto err_reg;
+
+ net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+ net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
+ net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
+ WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
+ WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+void __net_exit smc_sysctl_net_exit(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->smc.smc_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->smc.smc_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h
new file mode 100644
index 000000000000..0becc11bd2f4
--- /dev/null
+++ b/net/smc/smc_sysctl.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * smc_sysctl.c: sysctl interface to SMC subsystem.
+ *
+ * Copyright (c) 2022, Alibaba Inc.
+ *
+ * Author: Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#ifndef _SMC_SYSCTL_H
+#define _SMC_SYSCTL_H
+
+#ifdef CONFIG_SYSCTL
+
+int __net_init smc_sysctl_net_init(struct net *net);
+void __net_exit smc_sysctl_net_exit(struct net *net);
+
+#else
+
+static inline int smc_sysctl_net_init(struct net *net)
+{
+ net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+ return 0;
+}
+
+static inline void smc_sysctl_net_exit(struct net *net) { }
+
+#endif /* CONFIG_SYSCTL */
+
+#endif /* _SMC_SYSCTL_H */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index be241d53020f..64dedffe9d26 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -31,7 +31,6 @@
#include "smc_tracepoint.h"
#define SMC_TX_WORK_DELAY 0
-#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
/***************************** sndbuf producer *******************************/
@@ -132,6 +131,51 @@ static bool smc_tx_is_corked(struct smc_sock *smc)
return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
}
+/* If we have pending CDC messages, do not send:
+ * Because CQE of this CDC message will happen shortly, it gives
+ * a chance to coalesce future sendmsg() payload in to one RDMA Write,
+ * without need for a timer, and with no latency trade off.
+ * Algorithm here:
+ * 1. First message should never cork
+ * 2. If we have pending Tx CDC messages, wait for the first CDC
+ * message's completion
+ * 3. Don't cork to much data in a single RDMA Write to prevent burst
+ * traffic, total corked message should not exceed sendbuf/2
+ */
+static bool smc_should_autocork(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ int corking_size;
+
+ corking_size = min_t(unsigned int, conn->sndbuf_desc->len >> 1,
+ sock_net(&smc->sk)->smc.sysctl_autocorking_size);
+
+ if (atomic_read(&conn->cdc_pend_tx_wr) == 0 ||
+ smc_tx_prepared_sends(conn) > corking_size)
+ return false;
+ return true;
+}
+
+static bool smc_tx_should_cork(struct smc_sock *smc, struct msghdr *msg)
+{
+ struct smc_connection *conn = &smc->conn;
+
+ if (smc_should_autocork(smc))
+ return true;
+
+ /* for a corked socket defer the RDMA writes if
+ * sndbuf_space is still available. The applications
+ * should known how/when to uncork it.
+ */
+ if ((msg->msg_flags & MSG_MORE ||
+ smc_tx_is_corked(smc) ||
+ msg->msg_flags & MSG_SENDPAGE_NOTLAST) &&
+ atomic_read(&conn->sndbuf_space))
+ return true;
+
+ return false;
+}
+
/* sndbuf producer: main API called by socket layer.
* called under sock lock.
*/
@@ -202,7 +246,6 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
tx_cnt_prep);
chunk_len_sum = chunk_len;
chunk_off = tx_cnt_prep;
- smc_sndbuf_sync_sg_for_cpu(conn);
for (chunk = 0; chunk < 2; chunk++) {
rc = memcpy_from_msg(sndbuf_base + chunk_off,
msg, chunk_len);
@@ -236,15 +279,10 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
*/
if ((msg->msg_flags & MSG_OOB) && !send_remaining)
conn->urg_tx_pend = true;
- if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
- (atomic_read(&conn->sndbuf_space) >
- (conn->sndbuf_desc->len >> 1)))
- /* for a corked socket defer the RDMA writes if there
- * is still sufficient sndbuf_space available
- */
- queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
- SMC_TX_CORK_DELAY);
- else
+ /* If we need to cork, do nothing and wait for the next
+ * sendmsg() call or push on tx completion
+ */
+ if (!smc_tx_should_cork(smc, msg))
smc_tx_sndbuf_nonempty(conn);
trace_smc_tx_sendmsg(smc, copylen);
@@ -260,21 +298,33 @@ out_err:
return rc;
}
+int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset,
+ size_t size, int flags)
+{
+ struct msghdr msg = {.msg_flags = flags};
+ char *kaddr = kmap(page);
+ struct kvec iov;
+ int rc;
+
+ iov.iov_base = kaddr + offset;
+ iov.iov_len = size;
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
+ rc = smc_tx_sendmsg(smc, &msg, size);
+ kunmap(page);
+ return rc;
+}
+
/***************************** sndbuf consumer *******************************/
/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
u32 offset, int signal)
{
- struct smc_ism_position pos;
int rc;
- memset(&pos, 0, sizeof(pos));
- pos.token = conn->peer_token;
- pos.index = conn->peer_rmbe_idx;
- pos.offset = conn->tx_off + offset;
- pos.signal = signal;
- rc = smc_ism_write(conn->lgr->smcd, &pos, data, len);
+ rc = smc_ism_write(conn->lgr->smcd, conn->peer_token,
+ conn->peer_rmbe_idx, signal, conn->tx_off + offset,
+ data, len);
if (rc)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
return rc;
@@ -329,6 +379,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
+ u64 virt_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr;
int src_len_sum = src_len, dst_len_sum = dst_len;
int sent_count = src_off;
int srcchunk, dstchunk;
@@ -336,13 +387,25 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
- struct ib_sge *sge =
- wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
+ struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk];
+ struct ib_sge *sge = wr->wr.sg_list;
+ u64 base_addr = dma_addr;
+
+ if (dst_len < link->qp_attr.cap.max_inline_data) {
+ base_addr = virt_addr;
+ wr->wr.send_flags |= IB_SEND_INLINE;
+ } else {
+ wr->wr.send_flags &= ~IB_SEND_INLINE;
+ }
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
- sge[srcchunk].addr = dma_addr + src_off;
+ sge[srcchunk].addr = conn->sndbuf_desc->is_vm ?
+ (virt_addr + src_off) : (base_addr + src_off);
sge[srcchunk].length = src_len;
+ if (conn->sndbuf_desc->is_vm)
+ sge[srcchunk].lkey =
+ conn->sndbuf_desc->mr[link->link_idx]->lkey;
num_sges++;
src_off += src_len;
@@ -355,8 +418,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
- rc = smc_tx_rdma_write(conn, dst_off, num_sges,
- &wr_rdma_buf->wr_tx_rdma[dstchunk]);
+ rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr);
if (rc)
return rc;
if (dst_len_sum == len)
@@ -576,13 +638,26 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
- int rc;
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ int rc = 0;
+
+ /* No data in the send queue */
+ if (unlikely(smc_tx_prepared_sends(conn) <= 0))
+ goto out;
+
+ /* Peer don't have RMBE space */
+ if (unlikely(atomic_read(&conn->peer_rmbe_space) <= 0)) {
+ SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk);
+ goto out;
+ }
if (conn->killed ||
- conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
- return -EPIPE; /* connection being aborted */
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ rc = -EPIPE; /* connection being aborted */
+ goto out;
+ }
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
@@ -590,34 +665,72 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
if (!rc) {
/* trigger socket release if connection is closing */
- struct smc_sock *smc = container_of(conn, struct smc_sock,
- conn);
smc_close_wake_tx_prepared(smc);
}
+
+out:
+ return rc;
+}
+
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ int rc;
+
+ /* This make sure only one can send simultaneously to prevent wasting
+ * of CPU and CDC slot.
+ * Record whether someone has tried to push while we are pushing.
+ */
+ if (atomic_inc_return(&conn->tx_pushing) > 1)
+ return 0;
+
+again:
+ atomic_set(&conn->tx_pushing, 1);
+ smp_wmb(); /* Make sure tx_pushing is 1 before real send */
+ rc = __smc_tx_sndbuf_nonempty(conn);
+
+ /* We need to check whether someone else have added some data into
+ * the send queue and tried to push but failed after the atomic_set()
+ * when we are pushing.
+ * If so, we need to push again to prevent those data hang in the send
+ * queue.
+ */
+ if (unlikely(!atomic_dec_and_test(&conn->tx_pushing)))
+ goto again;
+
return rc;
}
/* Wakeup sndbuf consumers from process context
- * since there is more data to transmit
+ * since there is more data to transmit. The caller
+ * must hold sock lock.
*/
-void smc_tx_work(struct work_struct *work)
+void smc_tx_pending(struct smc_connection *conn)
{
- struct smc_connection *conn = container_of(to_delayed_work(work),
- struct smc_connection,
- tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
int rc;
- lock_sock(&smc->sk);
if (smc->sk.sk_err)
- goto out;
+ return;
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+}
-out:
+/* Wakeup sndbuf consumers from process context
+ * since there is more data to transmit in locked
+ * sock.
+ */
+void smc_tx_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(to_delayed_work(work),
+ struct smc_connection,
+ tx_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ lock_sock(&smc->sk);
+ smc_tx_pending(conn);
release_sock(&smc->sk);
}
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 07e6ad76224a..34b578498b1f 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -27,9 +27,12 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
}
+void smc_tx_pending(struct smc_connection *conn);
void smc_tx_work(struct work_struct *work);
void smc_tx_init(struct smc_sock *smc);
int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
+int smc_tx_sendpage(struct smc_sock *smc, struct page *page, int offset,
+ size_t size, int flags);
int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
void smc_tx_consumer_update(struct smc_connection *conn, bool force);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 24be1d03fef9..b0678a417e09 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -454,6 +454,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
for (i = 0; i < num; i++) {
link = wc[i].qp->qp_context;
+ link->wr_rx_id_compl = wc[i].wr_id;
if (wc[i].status == IB_WC_SUCCESS) {
link->wr_rx_tstamp = jiffies;
smc_wr_rx_demultiplex(&wc[i]);
@@ -465,6 +466,8 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
case IB_WC_RNR_RETRY_EXC_ERR:
case IB_WC_WR_FLUSH_ERR:
smcr_link_down_cond_sched(link);
+ if (link->wr_rx_id_compl == link->wr_rx_id)
+ wake_up(&link->wr_rx_empty_wait);
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
@@ -554,10 +557,11 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk)
{
int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
+ bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE);
u32 i;
for (i = 0; i < lnk->wr_tx_cnt; i++) {
- lnk->wr_tx_sges[i].addr =
+ lnk->wr_tx_sges[i].addr = send_inline ? (uintptr_t)(&lnk->wr_tx_bufs[i]) :
lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
@@ -575,6 +579,8 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags =
IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ if (send_inline)
+ lnk->wr_tx_ibs[i].send_flags |= IB_SEND_INLINE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
@@ -636,6 +642,7 @@ void smc_wr_free_link(struct smc_link *lnk)
return;
ibdev = lnk->smcibdev->ibdev;
+ smc_wr_drain_cq(lnk);
smc_wr_wakeup_reg_wait(lnk);
smc_wr_wakeup_tx_wait(lnk);
@@ -886,6 +893,7 @@ int smc_wr_create_link(struct smc_link *lnk)
atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);
atomic_set(&lnk->wr_reg_refcnt, 0);
+ init_waitqueue_head(&lnk->wr_rx_empty_wait);
return rc;
dma_unmap:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 47512ccce5ef..45e9b894d3f8 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -73,6 +73,11 @@ static inline void smc_wr_tx_link_put(struct smc_link *link)
wake_up_all(&link->wr_tx_wait);
}
+static inline void smc_wr_drain_cq(struct smc_link *lnk)
+{
+ wait_event(lnk->wr_rx_empty_wait, lnk->wr_rx_id_compl == lnk->wr_rx_id);
+}
+
static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
{
wake_up_all(&lnk->wr_tx_wait);
@@ -125,10 +130,6 @@ int smc_wr_tx_v2_send(struct smc_link *link,
int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter,
- smc_wr_tx_dismisser dismisser,
- unsigned long data);
void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
diff --git a/net/socket.c b/net/socket.c
index 50cf75730fd7..00da9ce3dba0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -301,7 +301,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
- ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
+ ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
init_waitqueue_head(&ei->socket.wq.wait);
@@ -355,7 +355,7 @@ static const struct super_operations sockfs_ops = {
*/
static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
+ return dynamic_dname(buffer, buflen, "socket:[%lu]",
d_inode(dentry)->i_ino);
}
@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file)
{
if (file->f_op == &socket_file_ops)
- return file->private_data; /* set in sock_map_fd */
+ return file->private_data; /* set in sock_alloc_file */
return NULL;
}
@@ -683,9 +683,18 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
{
u8 flags = *tx_flags;
- if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
flags |= SKBTX_HW_TSTAMP;
+ /* PTP hardware clocks can provide a free running cycle counter
+ * as a time base for virtual clocks. Tell driver to use the
+ * free running cycle counter for timestamp if socket is bound
+ * to virtual clock.
+ */
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
+ }
+
if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
flags |= SKBTX_SW_TSTAMP;
@@ -796,7 +805,28 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
}
-static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
+{
+ bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+ struct net_device *orig_dev;
+ ktime_t hwtstamp;
+
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev) {
+ *if_index = orig_dev->ifindex;
+ hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
+ } else {
+ hwtstamp = shhwtstamps->hwtstamp;
+ }
+ rcu_read_unlock();
+
+ return hwtstamp;
+}
+
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
+ int if_index)
{
struct scm_ts_pktinfo ts_pktinfo;
struct net_device *orig_dev;
@@ -806,11 +836,14 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
- rcu_read_lock();
- orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
- if (orig_dev)
- ts_pktinfo.if_index = orig_dev->ifindex;
- rcu_read_unlock();
+ if (!if_index) {
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev)
+ if_index = orig_dev->ifindex;
+ rcu_read_unlock();
+ }
+ ts_pktinfo.if_index = if_index;
ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
@@ -830,6 +863,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
+ int if_index;
ktime_t hwtstamp;
/* Race occurred between timestamp enabling and packet
@@ -878,18 +912,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
- hwtstamp = ptp_convert_timestamp(shhwtstamps,
- sk->sk_bind_phc);
+ if_index = 0;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
+ hwtstamp = get_timestamp(sk, skb, &if_index);
else
hwtstamp = shhwtstamps->hwtstamp;
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ hwtstamp = ptp_convert_timestamp(&hwtstamp,
+ sk->sk_bind_phc);
+
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
- put_ts_pktinfo(msg, skb);
+ put_ts_pktinfo(msg, skb, if_index);
}
}
if (!empty) {
@@ -930,13 +968,22 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
}
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb)
+static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (sock_flag(sk, SOCK_RCVMARK) && skb)
+ put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
+ &skb->mark);
+}
+
+void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
{
sock_recv_timestamp(msg, sk, skb);
sock_recv_drops(msg, sk, skb);
+ sock_recv_mark(msg, sk, skb);
}
-EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
size_t, int));
@@ -1538,11 +1585,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
}
EXPORT_SYMBOL(sock_create_kern);
-int __sys_socket(int family, int type, int protocol)
+static struct socket *__sys_socket_create(int family, int type, int protocol)
{
- int retval;
struct socket *sock;
- int flags;
+ int retval;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
@@ -1550,17 +1596,50 @@ int __sys_socket(int family, int type, int protocol)
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
- flags = type & ~SOCK_TYPE_MASK;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
+ if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return ERR_PTR(-EINVAL);
type &= SOCK_TYPE_MASK;
+ retval = sock_create(family, type, protocol, &sock);
+ if (retval < 0)
+ return ERR_PTR(retval);
+
+ return sock;
+}
+
+struct file *__sys_socket_file(int family, int type, int protocol)
+{
+ struct socket *sock;
+ struct file *file;
+ int flags;
+
+ sock = __sys_socket_create(family, type, protocol);
+ if (IS_ERR(sock))
+ return ERR_CAST(sock);
+
+ flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- retval = sock_create(family, type, protocol, &sock);
- if (retval < 0)
- return retval;
+ file = sock_alloc_file(sock, flags, NULL);
+ if (IS_ERR(file))
+ sock_release(sock);
+
+ return file;
+}
+
+int __sys_socket(int family, int type, int protocol)
+{
+ struct socket *sock;
+ int flags;
+
+ sock = __sys_socket_create(family, type, protocol);
+ if (IS_ERR(sock))
+ return PTR_ERR(sock);
+
+ flags = type & ~SOCK_TYPE_MASK;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
@@ -1722,7 +1801,7 @@ int __sys_listen(int fd, int backlog)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
@@ -1799,10 +1878,8 @@ out_fd:
return ERR_PTR(err);
}
-int __sys_accept4_file(struct file *file, unsigned file_flags,
- struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags,
- unsigned long nofile)
+static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
{
struct file *newfile;
int newfd;
@@ -1813,11 +1890,11 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- newfd = __get_unused_fd_flags(flags, nofile);
+ newfd = get_unused_fd_flags(flags);
if (unlikely(newfd < 0))
return newfd;
- newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
+ newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
flags);
if (IS_ERR(newfile)) {
put_unused_fd(newfd);
@@ -1847,9 +1924,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
f = fdget(fd);
if (f.file) {
- ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
- upeer_addrlen, flags,
- rlimit(RLIMIT_NOFILE));
+ ret = __sys_accept4_file(f.file, upeer_sockaddr,
+ upeer_addrlen, flags);
fdput(f);
}
@@ -2027,6 +2103,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
+ msg.msg_ubuf = NULL;
if (addr) {
err = move_addr_to_kernel(addr, addr_len, &address);
if (err < 0)
@@ -2070,10 +2147,13 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
struct sockaddr __user *addr, int __user *addr_len)
{
+ struct sockaddr_storage address;
+ struct msghdr msg = {
+ /* Save some cycles and don't copy the address if not needed */
+ .msg_name = addr ? (struct sockaddr *)&address : NULL,
+ };
struct socket *sock;
struct iovec iov;
- struct msghdr msg;
- struct sockaddr_storage address;
int err, err2;
int fput_needed;
@@ -2084,14 +2164,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
if (!sock)
goto out;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- /* Save some cycles and don't copy the address if not needed */
- msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
- /* We assume all kernel code knows the size of sockaddr_storage */
- msg.msg_namelen = 0;
- msg.msg_iocb = NULL;
- msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, flags);
@@ -2284,24 +2356,20 @@ struct used_address {
unsigned int name_len;
};
-int __copy_msghdr_from_user(struct msghdr *kmsg,
- struct user_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec __user **uiov, size_t *nsegs)
+int __copy_msghdr(struct msghdr *kmsg,
+ struct user_msghdr *msg,
+ struct sockaddr __user **save_addr)
{
- struct user_msghdr msg;
ssize_t err;
- if (copy_from_user(&msg, umsg, sizeof(*umsg)))
- return -EFAULT;
-
kmsg->msg_control_is_user = true;
- kmsg->msg_control_user = msg.msg_control;
- kmsg->msg_controllen = msg.msg_controllen;
- kmsg->msg_flags = msg.msg_flags;
+ kmsg->msg_get_inq = 0;
+ kmsg->msg_control_user = msg->msg_control;
+ kmsg->msg_controllen = msg->msg_controllen;
+ kmsg->msg_flags = msg->msg_flags;
- kmsg->msg_namelen = msg.msg_namelen;
- if (!msg.msg_name)
+ kmsg->msg_namelen = msg->msg_namelen;
+ if (!msg->msg_name)
kmsg->msg_namelen = 0;
if (kmsg->msg_namelen < 0)
@@ -2311,11 +2379,11 @@ int __copy_msghdr_from_user(struct msghdr *kmsg,
kmsg->msg_namelen = sizeof(struct sockaddr_storage);
if (save_addr)
- *save_addr = msg.msg_name;
+ *save_addr = msg->msg_name;
- if (msg.msg_name && kmsg->msg_namelen) {
+ if (msg->msg_name && kmsg->msg_namelen) {
if (!save_addr) {
- err = move_addr_to_kernel(msg.msg_name,
+ err = move_addr_to_kernel(msg->msg_name,
kmsg->msg_namelen,
kmsg->msg_name);
if (err < 0)
@@ -2326,12 +2394,11 @@ int __copy_msghdr_from_user(struct msghdr *kmsg,
kmsg->msg_namelen = 0;
}
- if (msg.msg_iovlen > UIO_MAXIOV)
+ if (msg->msg_iovlen > UIO_MAXIOV)
return -EMSGSIZE;
kmsg->msg_iocb = NULL;
- *uiov = msg.msg_iov;
- *nsegs = msg.msg_iovlen;
+ kmsg->msg_ubuf = NULL;
return 0;
}
@@ -2343,8 +2410,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
struct user_msghdr msg;
ssize_t err;
- err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
- &msg.msg_iovlen);
+ if (copy_from_user(&msg, umsg, sizeof(*umsg)))
+ return -EFAULT;
+
+ err = __copy_msghdr(kmsg, &msg, save_addr);
if (err)
return err;
@@ -3448,7 +3517,7 @@ EXPORT_SYMBOL(kernel_connect);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is bound.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
@@ -3463,7 +3532,7 @@ EXPORT_SYMBOL(kernel_getsockname);
* @addr: address holder
*
* Fills the @addr pointer with the address which the socket is connected.
- * Returns 0 or an error code.
+ * Returns the length of the address in bytes or an error code.
*/
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 1a72c67afed5..8299ceb3e373 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -533,6 +533,9 @@ EXPORT_SYMBOL_GPL(strp_check_rcv);
static int __init strp_dev_init(void)
{
+ BUILD_BUG_ON(sizeof(struct sk_skb_cb) >
+ sizeof_field(struct sk_buff, cb));
+
strp_wq = create_singlethread_workqueue("kstrp");
if (unlikely(!strp_wq))
return -ENOMEM;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a9f0d17fdb0d..fb75a883503f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -445,7 +445,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
*/
- if (!time_in_range(cred->cr_expire, expired, jiffies))
+ if (time_in_range(cred->cr_expire, expired, jiffies))
continue;
if (!rpcauth_unhash_cred(cred))
continue;
@@ -615,6 +615,8 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
};
struct rpc_cred *ret;
+ if (RPC_IS_ASYNC(task))
+ lookupflags |= RPCAUTH_LOOKUP_ASYNC;
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
put_cred(acred.cred);
return ret;
@@ -631,6 +633,8 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
if (!acred.principal)
return NULL;
+ if (RPC_IS_ASYNC(task))
+ lookupflags |= RPCAUTH_LOOKUP_ASYNC;
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
@@ -654,7 +658,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
};
if (flags & RPC_TASK_ASYNC)
- lookupflags |= RPCAUTH_LOOKUP_NEW;
+ lookupflags |= RPCAUTH_LOOKUP_NEW | RPCAUTH_LOOKUP_ASYNC;
if (task->tk_op_cred)
/* Task must use exactly this rpc_cred */
new = get_rpccred(task->tk_op_cred);
@@ -666,7 +670,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
/* If machine cred couldn't be bound, try a root cred */
if (new)
;
- else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
+ else if (cred == &machine_cred)
new = rpcauth_bind_root_cred(task, lookupflags);
else if (flags & RPC_TASK_NULLCREDS)
new = authnull_ops.lookup_cred(NULL, NULL, 0);
@@ -870,7 +874,7 @@ int __init rpcauth_init_module(void)
err = rpc_init_authunix();
if (err < 0)
goto out1;
- err = register_shrinker(&rpc_cred_shrinker);
+ err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred");
if (err < 0)
goto out2;
return 0;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5f42aa5fc612..7bb247c51e2f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -72,7 +72,8 @@ struct gss_auth {
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
- struct net *net;
+ struct net *net;
+ netns_tracker ns_tracker;
/*
* There are two upcall pipes; dentry[1], named "gssd", is used
* for the new text-based upcall; dentry[0] is named after the
@@ -145,7 +146,7 @@ gss_alloc_context(void)
{
struct gss_cl_ctx *ctx;
- ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
ctx->gc_proc = RPC_GSS_PROC_DATA;
ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */
@@ -208,7 +209,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
p = ERR_PTR(-EFAULT);
goto err;
}
- ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
+ ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL);
if (ret < 0) {
trace_rpcgss_import_ctx(ret);
p = ERR_PTR(ret);
@@ -510,7 +511,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
int vers;
int err = -ENOMEM;
- gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
+ gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL);
if (gss_msg == NULL)
goto err;
vers = get_pipe_version(gss_auth->net);
@@ -526,7 +527,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
gss_msg->auth = gss_auth;
kref_get(&gss_auth->kref);
if (service_name) {
- gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
+ gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL);
if (!gss_msg->service_name) {
err = -ENOMEM;
goto err_put_pipe_version;
@@ -702,7 +703,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (mlen > MSG_BUF_MAXSIZE)
goto out;
err = -ENOMEM;
- buf = kmalloc(mlen, GFP_NOFS);
+ buf = kmalloc(mlen, GFP_KERNEL);
if (!buf)
goto out;
@@ -1013,7 +1014,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
goto err_free;
}
gss_auth->client = clnt;
- gss_auth->net = get_net(rpc_net_ns(clnt));
+ gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker,
+ GFP_KERNEL);
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech)
@@ -1068,7 +1070,7 @@ err_destroy_credcache:
err_put_mech:
gss_mech_put(gss_auth->mech);
err_put_net:
- put_net(gss_auth->net);
+ put_net_track(gss_auth->net, &gss_auth->ns_tracker);
err_free:
kfree(gss_auth->target_name);
kfree(gss_auth);
@@ -1084,7 +1086,7 @@ gss_free(struct gss_auth *gss_auth)
gss_pipe_free(gss_auth->gss_pipe[0]);
gss_pipe_free(gss_auth->gss_pipe[1]);
gss_mech_put(gss_auth->mech);
- put_net(gss_auth->net);
+ put_net_track(gss_auth->net, &gss_auth->ns_tracker);
kfree(gss_auth->target_name);
kfree(gss_auth);
@@ -1218,7 +1220,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
struct gss_cred *new;
/* Make a copy of the cred so that we can reference count it */
- new = kzalloc(sizeof(*gss_cred), GFP_NOFS);
+ new = kzalloc(sizeof(*gss_cred), GFP_KERNEL);
if (new) {
struct auth_cred acred = {
.cred = gss_cred->gc_base.cr_cred,
@@ -1338,10 +1340,11 @@ gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
/*
* Lookup RPCSEC_GSS cred for the current process
*/
-static struct rpc_cred *
-gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+static struct rpc_cred *gss_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
{
- return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
+ return rpcauth_lookup_credcache(auth, acred, flags,
+ rpc_task_gfp_mask());
}
static struct rpc_cred *
@@ -1667,7 +1670,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
if (!p)
goto validate_failed;
- seq = kmalloc(4, GFP_NOFS);
+ seq = kmalloc(4, GFP_KERNEL);
if (!seq)
goto validate_failed;
*seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
@@ -1777,11 +1780,11 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
rqstp->rq_enc_pages
= kmalloc_array(rqstp->rq_enc_pages_num,
sizeof(struct page *),
- GFP_NOFS);
+ GFP_KERNEL);
if (!rqstp->rq_enc_pages)
goto out;
for (i=0; i < rqstp->rq_enc_pages_num; i++) {
- rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
+ rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL);
if (rqstp->rq_enc_pages[i] == NULL)
goto out_free;
}
@@ -1985,8 +1988,8 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
if (offset + len > rcv_buf->len)
goto unwrap_failed;
mic.len = len;
- mic.data = kmalloc(len, GFP_NOFS);
- if (!mic.data)
+ mic.data = kmalloc(len, GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(mic.data))
goto unwrap_failed;
if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len))
goto unwrap_failed;
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index f6d9631bd9d0..c53b329092d4 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_NOFS);
+ dest->data = kmemdup(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index fe97f3106536..4a4082bb22ad 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
if (ret)
return ret;
- if (!ret) {
- *buf_in = buf;
- *body_size = toksize;
- }
+ *buf_in = buf;
+ *body_size = toksize;
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 634b6c6e0dcb..3ea58175e159 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -161,7 +161,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
if (checksumdata == NULL)
return GSS_S_FAILURE;
@@ -169,7 +169,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(tfm))
goto out_free_cksum;
- req = ahash_request_alloc(tfm, GFP_NOFS);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto out_free_ahash;
@@ -257,7 +257,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
@@ -265,7 +265,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
if (IS_ERR(tfm))
goto out_free_cksum;
- req = ahash_request_alloc(tfm, GFP_NOFS);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto out_free_ahash;
@@ -554,7 +554,7 @@ gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
WARN_ON(0);
return -ENOMEM;
}
- data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS);
+ data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index fb117817ff5d..3200b971a814 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -49,7 +49,7 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
unsigned char *plain;
s32 code;
- plain = kmalloc(8, GFP_NOFS);
+ plain = kmalloc(8, GFP_KERNEL);
if (!plain)
return -ENOMEM;
@@ -80,7 +80,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
dprintk("RPC: krb5_get_seq_num:\n");
- plain = kmalloc(8, GFP_NOFS);
+ plain = kmalloc(8, GFP_KERNEL);
if (!plain)
return -ENOMEM;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index e95c009bb869..48337687848c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen)
/* initialize to random value */
if (i == 0) {
- i = prandom_u32();
- i = (i << 32) | prandom_u32();
+ i = get_random_u32();
+ i = (i << 32) | get_random_u32();
}
switch (conflen) {
@@ -409,7 +409,7 @@ static u32
gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, struct page **pages)
{
- u8 *ptr, *plainhdr;
+ u8 *ptr;
time64_t now;
u8 flags = 0x00;
__be16 *be16ptr;
@@ -426,7 +426,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
return GSS_S_FAILURE;
/* construct gss token header */
- ptr = plainhdr = buf->head[0].iov_base + offset;
+ ptr = buf->head[0].iov_base + offset;
*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 61c276bddaf2..f549e4c05def 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
* done without the correct namespace:
*/
.flags = RPC_CLNT_CREATE_NOPING |
+ RPC_CLNT_CREATE_CONNECTED |
RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
};
struct rpc_clnt *clnt;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b87565b64928..bcd74dddbe2d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
* rejecting the server-computed MIC in this somewhat rare case,
* do not use splice with the GSS integrity service.
*/
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
@@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
int pad, remaining_len, offset;
u32 rseqno;
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
@@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net)
static ssize_t write_gssp(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
char tbuf[20];
unsigned long i;
int res;
@@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf,
static ssize_t read_gssp(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct net *net = PDE_DATA(file_inode(file));
+ struct net *net = pde_data(file_inode(file));
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
unsigned long p = *ppos;
char tbuf[10];
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index e7df1f782b2e..1e091d3fa607 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -40,11 +40,19 @@ unx_destroy(struct rpc_auth *auth)
/*
* Lookup AUTH_UNIX creds for current process
*/
-static struct rpc_cred *
-unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
{
- struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
-
+ struct rpc_cred *ret;
+
+ ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask());
+ if (!ret) {
+ if (!(flags & RPCAUTH_LOOKUP_ASYNC))
+ return ERR_PTR(-ENOMEM);
+ ret = mempool_alloc(unix_pool, GFP_NOWAIT);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+ }
rpcauth_init_cred(ret, acred, auth, &unix_credops);
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
return ret;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 22a2c235abf1..65a6c6429a53 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -1,23 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/******************************************************************************
(c) 2007 Network Appliance, Inc. All Rights Reserved.
(c) 2009 NetApp. All Rights Reserved.
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-https://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
@@ -64,6 +50,17 @@ static void xprt_free_allocation(struct rpc_rqst *req)
kfree(req);
}
+static void xprt_bc_reinit_xdr_buf(struct xdr_buf *buf)
+{
+ buf->head[0].iov_len = PAGE_SIZE;
+ buf->tail[0].iov_len = 0;
+ buf->pages = NULL;
+ buf->page_len = 0;
+ buf->flags = 0;
+ buf->len = 0;
+ buf->buflen = PAGE_SIZE;
+}
+
static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
{
struct page *page;
@@ -75,9 +72,9 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
return 0;
}
-static
-struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt)
{
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
struct rpc_rqst *req;
/* Pre-allocate one backchannel rpc_rqst */
@@ -154,7 +151,7 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
- req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ req = xprt_alloc_bc_req(xprt);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
@@ -292,6 +289,9 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
*/
spin_lock_bh(&xprt->bc_pa_lock);
if (xprt_need_to_requeue(xprt)) {
+ xprt_bc_reinit_xdr_buf(&req->rq_snd_buf);
+ xprt_bc_reinit_xdr_buf(&req->rq_rcv_buf);
+ req->rq_rcv_buf.len = PAGE_SIZE;
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
atomic_inc(&xprt->bc_slot_count);
@@ -343,7 +343,7 @@ found:
break;
} else if (req)
break;
- new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ new = xprt_alloc_bc_req(xprt);
} while (new);
return req;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 59641803472c..f075a9fb5ccc 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,7 +33,9 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <trace/events/sunrpc.h>
+
#include "netns.h"
+#include "fail.h"
#define RPCDBG_FACILITY RPCDBG_CACHE
@@ -675,7 +677,7 @@ static void cache_limit_defers(void)
/* Consider removing either the first or the last */
if (cache_defer_cnt > DFR_MAX) {
- if (prandom_u32() & 1)
+ if (prandom_u32_max(2))
discard = list_entry(cache_defer_list.next,
struct cache_deferred_req, recent);
else
@@ -688,16 +690,30 @@ static void cache_limit_defers(void)
discard->revisit(discard, 1);
}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static inline bool cache_defer_immediately(void)
+{
+ return !fail_sunrpc.ignore_cache_wait &&
+ should_fail(&fail_sunrpc.attr, 1);
+}
+#else
+static inline bool cache_defer_immediately(void)
+{
+ return false;
+}
+#endif
+
/* Return true if and only if a deferred request is queued. */
static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
{
struct cache_deferred_req *dreq;
- if (req->thread_wait) {
+ if (!cache_defer_immediately()) {
cache_wait_req(req, item);
if (!test_bit(CACHE_PENDING, &item->flags))
return false;
}
+
dreq = req->defer(req);
if (dreq == NULL)
return false;
@@ -1536,7 +1552,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_read(filp, buf, count, ppos, cd);
}
@@ -1544,14 +1560,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_write(filp, buf, count, ppos, cd);
}
static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return cache_poll(filp, wait, cd);
}
@@ -1560,21 +1576,21 @@ static long cache_ioctl_procfs(struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_ioctl(inode, filp, cmd, arg, cd);
}
static int cache_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_open(inode, filp, cd);
}
static int cache_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return cache_release(inode, filp, cd);
}
@@ -1591,14 +1607,14 @@ static const struct proc_ops cache_channel_proc_ops = {
static int content_open_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_open(inode, filp, cd);
}
static int content_release_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return content_release(inode, filp, cd);
}
@@ -1612,14 +1628,14 @@ static const struct proc_ops content_proc_ops = {
static int open_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return open_flush(inode, filp, cd);
}
static int release_flush_procfs(struct inode *inode, struct file *filp)
{
- struct cache_detail *cd = PDE_DATA(inode);
+ struct cache_detail *cd = pde_data(inode);
return release_flush(inode, filp, cd);
}
@@ -1627,7 +1643,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return read_flush(filp, buf, count, ppos, cd);
}
@@ -1636,7 +1652,7 @@ static ssize_t write_flush_procfs(struct file *filp,
const char __user *buf,
size_t count, loff_t *ppos)
{
- struct cache_detail *cd = PDE_DATA(file_inode(filp));
+ struct cache_detail *cd = pde_data(file_inode(filp));
return write_flush(filp, buf, count, ppos, cd);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a312ea2bc440..993acf38af87 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task,
static int rpc_decode_header(struct rpc_task *task,
struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);
+static int rpc_ping_noreply(struct rpc_clnt *clnt);
static void rpc_check_timeout(struct rpc_task *task);
static void rpc_register_client(struct rpc_clnt *clnt)
@@ -344,7 +345,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt)
{
int clid;
- clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
+ clid = ida_alloc(&rpc_clids, GFP_KERNEL);
if (clid < 0)
return clid;
clnt->cl_clid = clid;
@@ -353,7 +354,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt)
static void rpc_free_clid(struct rpc_clnt *clnt)
{
- ida_simple_remove(&rpc_clids, clnt->cl_clid);
+ ida_free(&rpc_clids, clnt->cl_clid);
}
static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
@@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
+ } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+ int err = rpc_ping_noreply(clnt);
+ if (err != 0) {
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
}
clnt->cl_softrtry = 1;
@@ -644,6 +651,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_max_connect = clnt->cl_max_connect;
return new;
out_err:
@@ -778,7 +786,8 @@ out_revert:
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
static
-int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
+int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
+ void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
{
struct rpc_xprt_switch *xps;
@@ -787,11 +796,24 @@ int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
rcu_read_unlock();
if (xps == NULL)
return -EAGAIN;
- xprt_iter_init_listall(xpi, xps);
+ func(xpi, xps);
xprt_switch_put(xps);
return 0;
}
+static
+int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
+{
+ return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listall);
+}
+
+static
+int rpc_clnt_xprt_iter_offline_init(struct rpc_clnt *clnt,
+ struct rpc_xprt_iter *xpi)
+{
+ return _rpc_clnt_xprt_iter_init(clnt, xpi, xprt_iter_init_listoffline);
+}
+
/**
* rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
* @clnt: pointer to client
@@ -851,6 +873,57 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
+/**
+ * rpc_cancel_tasks - try to cancel a set of RPC tasks
+ * @clnt: Pointer to RPC client
+ * @error: RPC task error value to set
+ * @fnmatch: Pointer to selector function
+ * @data: User data
+ *
+ * Uses @fnmatch to define a set of RPC tasks that are to be cancelled.
+ * The argument @error must be a negative error value.
+ */
+unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error,
+ bool (*fnmatch)(const struct rpc_task *,
+ const void *),
+ const void *data)
+{
+ struct rpc_task *task;
+ unsigned long count = 0;
+
+ if (list_empty(&clnt->cl_tasks))
+ return 0;
+ /*
+ * Spin lock all_tasks to prevent changes...
+ */
+ spin_lock(&clnt->cl_lock);
+ list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
+ if (!RPC_IS_ACTIVATED(task))
+ continue;
+ if (!fnmatch(task, data))
+ continue;
+ rpc_task_try_cancel(task, error);
+ count++;
+ }
+ spin_unlock(&clnt->cl_lock);
+ return count;
+}
+EXPORT_SYMBOL_GPL(rpc_cancel_tasks);
+
+static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt, void *dummy)
+{
+ if (xprt_connected(xprt))
+ xprt_force_disconnect(xprt);
+ return 0;
+}
+
+void rpc_clnt_disconnect(struct rpc_clnt *clnt)
+{
+ rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_disconnect_xprt, NULL);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_disconnect);
+
/*
* Properly shut down an RPC client, terminating all outstanding
* requests.
@@ -1065,8 +1138,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (task->tk_xprt)
- return;
+ if (task->tk_xprt) {
+ if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE)))
+ return;
+ xprt_release(task);
+ xprt_put(task->tk_xprt);
+ }
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
else
@@ -1085,8 +1163,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
- if (atomic_read(&clnt->cl_swapper))
- task->tk_flags |= RPC_TASK_SWAPPER;
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1127,6 +1203,8 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
struct rpc_task *task;
task = rpc_new_task(task_setup_data);
+ if (IS_ERR(task))
+ return task;
if (!RPC_IS_ASYNC(task))
task->tk_flags |= RPC_TASK_CRED_NOREF;
@@ -1227,6 +1305,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ xprt_free_bc_request(req);
+ return task;
+ }
+
xprt_init_bc_request(req, task);
task->tk_action = call_bc_encode;
@@ -1610,7 +1693,7 @@ static void
__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
{
trace_rpc_call_rpcerror(task, tk_status, rpc_status);
- task->tk_rpc_status = rpc_status;
+ rpc_task_set_rpc_status(task, rpc_status);
rpc_exit(task, tk_status);
}
@@ -1745,6 +1828,9 @@ call_refreshresult(struct rpc_task *task)
task->tk_cred_retry--;
trace_rpc_retry_refresh_status(task);
return;
+ case -ENOMEM:
+ rpc_delay(task, HZ >> 4);
+ return;
}
trace_rpc_refresh_status(task);
rpc_call_rpcerror(task, status);
@@ -1835,7 +1921,6 @@ rpc_xdr_encode(struct rpc_task *task)
req->rq_snd_buf.head[0].iov_len = 0;
xdr_init_encode(&xdr, &req->rq_snd_buf,
req->rq_snd_buf.head[0].iov_base, req);
- xdr_free_bvec(&req->rq_snd_buf);
if (rpc_encode_header(task, &xdr))
return;
@@ -1855,6 +1940,9 @@ call_encode(struct rpc_task *task)
xprt_request_dequeue_xprt(task);
/* Encode here so that rpcsec_gss can use correct sequence number. */
rpc_xdr_encode(task);
+ /* Add task to reply queue before transmission to avoid races */
+ if (task->tk_status == 0 && rpc_reply_expected(task))
+ task->tk_status = xprt_request_enqueue_receive(task);
/* Did the encode result in an error condition? */
if (task->tk_status != 0) {
/* Was the error nonfatal? */
@@ -1865,7 +1953,7 @@ call_encode(struct rpc_task *task)
break;
case -EKEYEXPIRED:
if (!task->tk_cred_retry) {
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
@@ -1878,9 +1966,6 @@ call_encode(struct rpc_task *task)
return;
}
- /* Add task to reply queue before transmission to avoid races */
- if (rpc_reply_expected(task))
- xprt_request_enqueue_receive(task);
xprt_request_enqueue_transmit(task);
out:
task->tk_action = call_transmit;
@@ -2116,7 +2201,8 @@ call_connect_status(struct rpc_task *task)
xprt_release(task);
value = atomic_long_dec_return(&xprt->queuelen);
if (value == 0)
- rpc_xprt_switch_remove_xprt(xps, saved);
+ rpc_xprt_switch_remove_xprt(xps, saved,
+ true);
xprt_put(saved);
task->tk_xprt = NULL;
task->tk_action = call_start;
@@ -2197,6 +2283,7 @@ call_transmit_status(struct rpc_task *task)
* socket just returned a connection error,
* then hold onto the transport lock.
*/
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2280,6 +2367,7 @@ call_bc_transmit_status(struct rpc_task *task)
case -ENOTCONN:
case -EPIPE:
break;
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2362,6 +2450,11 @@ call_status(struct rpc_task *task)
case -EPIPE:
case -EAGAIN:
break;
+ case -ENFILE:
+ case -ENOBUFS:
+ case -ENOMEM:
+ rpc_delay(task, HZ>>2);
+ break;
case -EIO:
/* shutdown or soft timeout */
goto out_exit;
@@ -2393,10 +2486,8 @@ rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- if (RPC_SIGNALLED(task)) {
- rpc_call_rpcerror(task, -ERESTARTSYS);
+ if (RPC_SIGNALLED(task))
return;
- }
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
@@ -2622,7 +2713,7 @@ out_unparsable:
out_verifier:
trace_rpc_bad_verifier(task);
- goto out_garbage;
+ goto out_err;
out_msg_denied:
error = -EACCES;
@@ -2689,6 +2780,10 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
+static const struct rpc_procinfo rpcproc_null_noreply = {
+ .p_encode = rpcproc_encode_null,
+};
+
static void
rpc_null_call_prepare(struct rpc_task *task, void *data)
{
@@ -2742,6 +2837,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
return status;
}
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null_noreply,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .callback_ops = &rpc_null_ops,
+ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
+ };
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
@@ -2793,7 +2910,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
return -EINVAL;
}
- data = kmalloc(sizeof(*data), GFP_NOFS);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->xps = xprt_switch_get(xps);
@@ -2805,6 +2922,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
data->xps->xps_nunique_destaddr_xprts++;
rpc_put_task(task);
success:
@@ -2812,6 +2932,30 @@ success:
}
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
+static int rpc_clnt_add_xprt_helper(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ struct rpc_add_xprt_test *data)
+{
+ struct rpc_task *task;
+ int status = -EADDRINUSE;
+
+ /* Test the connection */
+ task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ status = task->tk_status;
+ rpc_put_task(task);
+
+ if (status < 0)
+ return status;
+
+ /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
+ data->add_xprt_test(clnt, xprt, data->data);
+
+ return 0;
+}
+
/**
* rpc_clnt_setup_test_and_add_xprt()
*
@@ -2835,8 +2979,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *data)
{
- struct rpc_task *task;
- struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
int status = -EADDRINUSE;
xprt = xprt_get(xprt);
@@ -2845,31 +2987,19 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
goto out_err;
- /* Test the connection */
- task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
- if (IS_ERR(task)) {
- status = PTR_ERR(task);
- goto out_err;
- }
- status = task->tk_status;
- rpc_put_task(task);
-
+ status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
if (status < 0)
goto out_err;
- /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
- xtest->add_xprt_test(clnt, xprt, xtest->data);
-
- xprt_put(xprt);
- xprt_switch_put(xps);
-
- /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
- return 1;
+ status = 1;
out_err:
xprt_put(xprt);
xprt_switch_put(xps);
- pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n",
- status, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ if (status < 0)
+ pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not "
+ "added\n", status,
+ xprt->address_strings[RPC_DISPLAY_ADDR]);
+ /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
return status;
}
EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt);
@@ -2900,7 +3030,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport, reuseport;
- int ret = 0;
+ int ret = 0, ident;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
@@ -2914,8 +3044,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
reuseport = xprt->reuseport;
connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
+ ident = xprt->xprt_class->ident;
rcu_read_unlock();
+ if (!xprtargs->ident)
+ xprtargs->ident = ident;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
@@ -2943,6 +3076,110 @@ out_put_switch:
}
EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ struct rpc_add_xprt_test *data)
+{
+ struct rpc_xprt_switch *xps;
+ struct rpc_xprt *main_xprt;
+ int status = 0;
+
+ xprt_get(xprt);
+
+ rcu_read_lock();
+ main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
+ (struct sockaddr *)&main_xprt->addr);
+ rcu_read_unlock();
+ xprt_put(main_xprt);
+ if (status || !test_bit(XPRT_OFFLINE, &xprt->state))
+ goto out;
+
+ status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
+out:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ return status;
+}
+
+/* rpc_clnt_probe_trunked_xprt -- probe offlined transport for session trunking
+ * @clnt rpc_clnt structure
+ *
+ * For each offlined transport found in the rpc_clnt structure call
+ * the function rpc_xprt_probe_trunked() which will determine if this
+ * transport still belongs to the trunking group.
+ */
+void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *clnt,
+ struct rpc_add_xprt_test *data)
+{
+ struct rpc_xprt_iter xpi;
+ int ret;
+
+ ret = rpc_clnt_xprt_iter_offline_init(clnt, &xpi);
+ if (ret)
+ return;
+ for (;;) {
+ struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
+
+ if (!xprt)
+ break;
+ ret = rpc_xprt_probe_trunked(clnt, xprt, data);
+ xprt_put(xprt);
+ if (ret < 0)
+ break;
+ xprt_iter_rewind(&xpi);
+ }
+ xprt_iter_destroy(&xpi);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_probe_trunked_xprts);
+
+static int rpc_xprt_offline(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ void *data)
+{
+ struct rpc_xprt *main_xprt;
+ struct rpc_xprt_switch *xps;
+ int err = 0;
+
+ xprt_get(xprt);
+
+ rcu_read_lock();
+ main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ err = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
+ (struct sockaddr *)&main_xprt->addr);
+ rcu_read_unlock();
+ xprt_put(main_xprt);
+ if (err)
+ goto out;
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ err = -EINTR;
+ goto out;
+ }
+ xprt_set_offline_locked(xprt, xps);
+
+ xprt_release_write(xprt, NULL);
+out:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ return err;
+}
+
+/* rpc_clnt_manage_trunked_xprts -- offline trunked transports
+ * @clnt rpc_clnt structure
+ *
+ * For each active transport found in the rpc_clnt structure call
+ * the function rpc_xprt_offline() which will identify trunked transports
+ * and will mark them offline.
+ */
+void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *clnt)
+{
+ rpc_clnt_iterate_for_each_xprt(clnt, rpc_xprt_offline, NULL);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_manage_trunked_xprts);
+
struct connect_timeout_data {
unsigned long connect_timeout;
unsigned long reconnect_timeout;
@@ -2985,8 +3222,22 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
+void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ rcu_read_unlock();
+ xprt_set_online_locked(xprt, xps);
+}
+
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ if (rpc_clnt_xprt_switch_has_addr(clnt,
+ (const struct sockaddr *)&xprt->addr)) {
+ return rpc_clnt_xprt_set_online(clnt, xprt);
+ }
rcu_read_lock();
rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
xprt);
@@ -2994,6 +3245,19 @@ void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
+void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ rpc_xprt_switch_remove_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
+ xprt, 0);
+ xps->xps_nunique_destaddr_xprts--;
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_remove_xprt);
+
bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
const struct sockaddr *sap)
{
@@ -3065,6 +3329,8 @@ rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
int
rpc_clnt_swap_activate(struct rpc_clnt *clnt)
{
+ while (clnt != clnt->cl_parent)
+ clnt = clnt->cl_parent;
if (atomic_inc_return(&clnt->cl_swapper) == 1)
return rpc_clnt_iterate_for_each_xprt(clnt,
rpc_clnt_swap_activate_callback, NULL);
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 7dc9cc929bfd..a176d5a0b0ee 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -262,6 +262,9 @@ static void fail_sunrpc_init(void)
debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_server_disconnect);
+
+ debugfs_create_bool("ignore-cache-wait", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_cache_wait);
}
#else
static void fail_sunrpc_init(void)
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
index 69dc30cc44b8..4b4b500df428 100644
--- a/net/sunrpc/fail.h
+++ b/net/sunrpc/fail.h
@@ -14,8 +14,8 @@ struct fail_sunrpc_attr {
struct fault_attr attr;
bool ignore_client_disconnect;
-
bool ignore_server_disconnect;
+ bool ignore_cache_wait;
};
extern struct fail_sunrpc_attr fail_sunrpc;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ee5336d73fdd..0b6034fab9ab 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -197,7 +197,7 @@ static struct inode *
rpc_alloc_inode(struct super_block *sb)
{
struct rpc_inode *rpci;
- rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
+ rpci = alloc_inode_sb(sb, rpc_inode_cachep, GFP_KERNEL);
if (!rpci)
return NULL;
return &rpci->vfs_inode;
@@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_rmdir(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
@@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
dget(dentry);
ret = simple_unlink(dir, dentry);
+ d_drop(dentry);
if (!ret)
fsnotify_unlink(dir, dentry);
- d_delete(dentry);
dput(dentry);
return ret;
}
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 647b323cc1d5..5a8e6d46809a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task)
goto bailout_nofree;
}
- map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
+ map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask());
if (!map) {
status = -ENOMEM;
goto bailout_release_client;
@@ -730,7 +730,7 @@ void rpcb_getport_async(struct rpc_task *task)
case RPCBVERS_4:
case RPCBVERS_3:
map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
- map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
+ map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask());
if (!map->r_addr) {
status = -ENOMEM;
goto bailout_free_args;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index e2c835482791..be587a308e05 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -57,6 +57,21 @@ struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
EXPORT_SYMBOL_GPL(xprtiod_workqueue);
+gfp_t rpc_task_gfp_mask(void)
+{
+ if (current->flags & PF_WQ_WORKER)
+ return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ return GFP_KERNEL;
+}
+EXPORT_SYMBOL_GPL(rpc_task_gfp_mask);
+
+bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status)
+{
+ if (cmpxchg(&task->tk_rpc_status, 0, rpc_status) == 0)
+ return true;
+ return false;
+}
+
unsigned long
rpc_task_timeout(const struct rpc_task *task)
{
@@ -186,11 +201,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
/*
* Add new request to wait queue.
- *
- * Swapper tasks always get inserted at the head of the queue.
- * This should avoid many nasty memory deadlocks and hopefully
- * improve overall performance.
- * Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
struct rpc_task *task,
@@ -199,8 +209,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
- else if (RPC_IS_SWAPPER(task))
- list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
@@ -268,7 +276,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- freezable_schedule_unsafe();
+ schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
@@ -332,14 +340,12 @@ static int rpc_complete_task(struct rpc_task *task)
* to enforce taking of the wq->lock and hence avoid races with
* rpc_complete_task().
*/
-int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
+int rpc_wait_for_completion_task(struct rpc_task *task)
{
- if (action == NULL)
- action = rpc_wait_bit_killable;
return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
- action, TASK_KILLABLE);
+ rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}
-EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
+EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
@@ -854,12 +860,25 @@ void rpc_signal_task(struct rpc_task *task)
if (!RPC_IS_ACTIVATED(task))
return;
+ if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
+ return;
trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
- rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
+ rpc_wake_up_queued_task(queue, task);
+}
+
+void rpc_task_try_cancel(struct rpc_task *task, int error)
+{
+ struct rpc_wait_queue *queue;
+
+ if (!rpc_task_set_rpc_status(task, error))
+ return;
+ queue = READ_ONCE(task->tk_waitqueue);
+ if (queue)
+ rpc_wake_up_queued_task(queue, task);
}
void rpc_exit(struct rpc_task *task, int status)
@@ -876,6 +895,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
ops->rpc_release(calldata);
}
+static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk)
+{
+ if (!xprt)
+ return false;
+ if (!atomic_read(&xprt->swapper))
+ return false;
+ return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk;
+}
+
/*
* This is the RPC `scheduler' (or rather, the finite state machine).
*/
@@ -884,6 +912,7 @@ static void __rpc_execute(struct rpc_task *task)
struct rpc_wait_queue *queue;
int task_is_async = RPC_IS_ASYNC(task);
int status = 0;
+ unsigned long pflags = current->flags;
WARN_ON_ONCE(RPC_IS_QUEUED(task));
if (RPC_IS_QUEUED(task))
@@ -896,16 +925,26 @@ static void __rpc_execute(struct rpc_task *task)
* Perform the next FSM step or a pending callback.
*
* tk_action may be NULL if the task has been killed.
- * In particular, note that rpc_killall_tasks may
- * do this at any time, so beware when dereferencing.
*/
do_action = task->tk_action;
+ /* Tasks with an RPC error status should exit */
+ if (do_action != rpc_exit_task &&
+ (status = READ_ONCE(task->tk_rpc_status)) != 0) {
+ task->tk_status = status;
+ if (do_action != NULL)
+ do_action = rpc_exit_task;
+ }
+ /* Callbacks override all actions */
if (task->tk_callback) {
do_action = task->tk_callback;
task->tk_callback = NULL;
}
if (!do_action)
break;
+ if (RPC_IS_SWAPPER(task) ||
+ xprt_needs_memalloc(task->tk_xprt, task))
+ current->flags |= PF_MEMALLOC;
+
trace_rpc_task_run_action(task, do_action);
do_action(task);
@@ -918,14 +957,6 @@ static void __rpc_execute(struct rpc_task *task)
}
/*
- * Signalled tasks should exit rather than sleep.
- */
- if (RPC_SIGNALLED(task)) {
- task->tk_rpc_status = -ERESTARTSYS;
- rpc_exit(task, -ERESTARTSYS);
- }
-
- /*
* The queue->lock protects against races with
* rpc_make_runnable().
*
@@ -940,16 +971,22 @@ static void __rpc_execute(struct rpc_task *task)
spin_unlock(&queue->lock);
continue;
}
+ /* Wake up any task that has an exit status */
+ if (READ_ONCE(task->tk_rpc_status) != 0) {
+ rpc_wake_up_task_queue_locked(queue, task);
+ spin_unlock(&queue->lock);
+ continue;
+ }
rpc_clear_running(task);
spin_unlock(&queue->lock);
if (task_is_async)
- return;
+ goto out;
/* sync task: sleep here */
trace_rpc_task_sync_sleep(task, task->tk_action);
status = out_of_line_wait_on_bit(&task->tk_runstate,
RPC_TASK_QUEUED, rpc_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE);
if (status < 0) {
/*
* When a sync task receives a signal, it exits with
@@ -957,16 +994,15 @@ static void __rpc_execute(struct rpc_task *task)
* clean up after sleeping on some queue, we don't
* break the loop here, but go around once more.
*/
- trace_rpc_task_signalled(task, task->tk_action);
- set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
- task->tk_rpc_status = -ERESTARTSYS;
- rpc_exit(task, -ERESTARTSYS);
+ rpc_signal_task(task);
}
trace_rpc_task_sync_wake(task, task->tk_action);
}
/* Release all resources associated with the task */
rpc_release_task(task);
+out:
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/*
@@ -1021,15 +1057,15 @@ int rpc_malloc(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
- gfp_t gfp = GFP_NOFS;
-
- if (RPC_IS_SWAPPER(task))
- gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp_t gfp = rpc_task_gfp_mask();
size += sizeof(struct rpc_buffer);
- if (size <= RPC_BUFFER_MAXSIZE)
- buf = mempool_alloc(rpc_buffer_mempool, gfp);
- else
+ if (size <= RPC_BUFFER_MAXSIZE) {
+ buf = kmem_cache_alloc(rpc_buffer_slabp, gfp);
+ /* Reach for the mempool if dynamic allocation fails */
+ if (!buf && RPC_IS_ASYNC(task))
+ buf = mempool_alloc(rpc_buffer_mempool, GFP_NOWAIT);
+ } else
buf = kmalloc(size, gfp);
if (!buf)
@@ -1092,10 +1128,14 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
rpc_init_task_statistics(task);
}
-static struct rpc_task *
-rpc_alloc_task(void)
+static struct rpc_task *rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+ struct rpc_task *task;
+
+ task = kmem_cache_alloc(rpc_task_slabp, rpc_task_gfp_mask());
+ if (task)
+ return task;
+ return mempool_alloc(rpc_task_mempool, GFP_NOWAIT);
}
/*
@@ -1108,6 +1148,11 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
if (task == NULL) {
task = rpc_alloc_task();
+ if (task == NULL) {
+ rpc_release_calldata(setup_data->callback_ops,
+ setup_data->callback_data);
+ return ERR_PTR(-ENOMEM);
+ }
flags = RPC_TASK_DYNAMIC;
}
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index d52313af82bc..71ba4cf513bc 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -15,6 +15,7 @@
#include <linux/pagemap.h>
#include <linux/udp.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xdr.h>
#include <linux/export.h>
@@ -220,12 +221,6 @@ static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, size_t base)
{
- int err;
-
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- return err;
-
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr),
xdr->page_len + xdr->page_base);
return xprt_sendmsg(sock, msg, base + xdr->page_base);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index c964b48eaaba..52908f9e6eab 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -66,7 +66,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
static int rpc_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, rpc_proc_show, PDE_DATA(inode));
+ return single_open(file, rpc_proc_show, pde_data(inode));
}
static const struct proc_ops rpc_proc_ops = {
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 2f59464e6524..d4a362c9e4b3 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -1,22 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/******************************************************************************
(c) 2008 NetApp. All Rights Reserved.
-NetApp provides this source code under the GPL v2 License.
-The GPL v2 license is available at
-https://opensource.org/licenses/gpl-license.php.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4292278a9552..149171774bc6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -37,18 +37,37 @@
static void svc_unregister(const struct svc_serv *serv, struct net *net);
-#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function)
-
#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+ SVC_POOL_AUTO = -1, /* choose one of the others */
+ SVC_POOL_GLOBAL, /* no mapping, just a single global pool
+ * (legacy & UP mode) */
+ SVC_POOL_PERCPU, /* one pool per cpu */
+ SVC_POOL_PERNODE /* one pool per numa node */
+};
+
+/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
-struct svc_pool_map svc_pool_map = {
+
+struct svc_pool_map {
+ int count; /* How many svc_servs use us */
+ int mode; /* Note: int not enum to avoid
+ * warnings about "enumeration value
+ * not handled in switch" */
+ unsigned int npools;
+ unsigned int *pool_to; /* maps pool id to cpu or node */
+ unsigned int *to_pool; /* maps cpu or node to pool id */
+};
+
+static struct svc_pool_map svc_pool_map = {
.mode = SVC_POOL_DEFAULT
};
-EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
@@ -219,10 +238,12 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
/*
* Add a reference to the global map of cpus to pools (and
- * vice versa). Initialise the map if we're the first user.
- * Returns the number of pools.
+ * vice versa) if pools are in use.
+ * Initialise the map if we're the first user.
+ * Returns the number of pools. If this is '1', no reference
+ * was taken.
*/
-unsigned int
+static unsigned int
svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
@@ -232,6 +253,7 @@ svc_pool_map_get(void)
if (m->count++) {
mutex_unlock(&svc_pool_map_mutex);
+ WARN_ON_ONCE(m->npools <= 1);
return m->npools;
}
@@ -247,30 +269,36 @@ svc_pool_map_get(void)
break;
}
- if (npools < 0) {
+ if (npools <= 0) {
/* default, or memory allocation failure */
npools = 1;
m->mode = SVC_POOL_GLOBAL;
}
m->npools = npools;
+ if (npools == 1)
+ /* service is unpooled, so doesn't hold a reference */
+ m->count--;
+
mutex_unlock(&svc_pool_map_mutex);
- return m->npools;
+ return npools;
}
-EXPORT_SYMBOL_GPL(svc_pool_map_get);
/*
- * Drop a reference to the global map of cpus to pools.
+ * Drop a reference to the global map of cpus to pools, if
+ * pools were in use, i.e. if npools > 1.
* When the last reference is dropped, the map data is
* freed; this allows the sysadmin to change the pool
* mode using the pool_mode module option without
* rebooting or re-loading sunrpc.ko.
*/
-void
-svc_pool_map_put(void)
+static void
+svc_pool_map_put(int npools)
{
struct svc_pool_map *m = &svc_pool_map;
+ if (npools <= 1)
+ return;
mutex_lock(&svc_pool_map_mutex);
if (!--m->count) {
@@ -283,7 +311,6 @@ svc_pool_map_put(void)
mutex_unlock(&svc_pool_map_mutex);
}
-EXPORT_SYMBOL_GPL(svc_pool_map_put);
static int svc_pool_map_get_node(unsigned int pidx)
{
@@ -329,32 +356,35 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
}
}
-/*
- * Use the mapping mode to choose a pool for a given CPU.
- * Used when enqueueing an incoming RPC. Always returns
- * a non-NULL pool pointer.
+/**
+ * svc_pool_for_cpu - Select pool to run a thread on this cpu
+ * @serv: An RPC service
+ *
+ * Use the active CPU and the svc_pool_map's mode setting to
+ * select the svc thread pool to use. Once initialized, the
+ * svc_pool_map does not change.
+ *
+ * Return value:
+ * A pointer to an svc_pool
*/
-struct svc_pool *
-svc_pool_for_cpu(struct svc_serv *serv, int cpu)
+struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
{
struct svc_pool_map *m = &svc_pool_map;
+ int cpu = raw_smp_processor_id();
unsigned int pidx = 0;
- /*
- * An uninitialised map happens in a pure client when
- * lockd is brought up, so silently treat it the
- * same as SVC_POOL_GLOBAL.
- */
- if (svc_serv_is_pooled(serv)) {
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
- }
+ if (serv->sv_nrpools <= 1)
+ return serv->sv_pools;
+
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
}
+
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -424,7 +454,7 @@ __svc_init_bc(struct svc_serv *serv)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- const struct svc_serv_ops *ops)
+ int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -435,13 +465,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- serv->sv_nrthreads = 1;
+ kref_init(&serv->sv_refcnt);
serv->sv_stats = prog->pg_stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
- serv->sv_ops = ops;
+ serv->sv_threadfn = threadfn;
xdrsize = 0;
while (prog) {
prog->pg_lovers = prog->pg_nvers-1;
@@ -487,59 +517,56 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return serv;
}
-struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize,
- const struct svc_serv_ops *ops)
+/**
+ * svc_create - Create an RPC service
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
+ int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, /*npools*/1, ops);
+ return __svc_create(prog, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
-struct svc_serv *
-svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- const struct svc_serv_ops *ops)
+/**
+ * svc_create_pooled - Create an RPC service with pooled threads
+ * @prog: the RPC program the new service will handle
+ * @bufsize: maximum message size for @prog
+ * @threadfn: a function to service RPC requests for @prog
+ *
+ * Returns an instantiated struct svc_serv object or NULL.
+ */
+struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ unsigned int bufsize,
+ int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, ops);
+ serv = __svc_create(prog, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
out_err:
- svc_pool_map_put();
+ svc_pool_map_put(npools);
return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
-void svc_shutdown_net(struct svc_serv *serv, struct net *net)
-{
- svc_close_net(serv, net);
-
- if (serv->sv_ops->svo_shutdown)
- serv->sv_ops->svo_shutdown(serv, net);
-}
-EXPORT_SYMBOL_GPL(svc_shutdown_net);
-
/*
* Destroy an RPC service. Should be called with appropriate locking to
- * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
+ * protect sv_permsocks and sv_tempsocks.
*/
void
-svc_destroy(struct svc_serv *serv)
+svc_destroy(struct kref *ref)
{
- dprintk("svc: svc_destroy(%s, %d)\n",
- serv->sv_program->pg_name,
- serv->sv_nrthreads);
-
- if (serv->sv_nrthreads) {
- if (--(serv->sv_nrthreads) != 0) {
- svc_sock_update_bufs(serv);
- return;
- }
- } else
- printk("svc_destroy: no threads for serv=%p!\n", serv);
+ struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
del_timer_sync(&serv->sv_temptimer);
/*
@@ -551,8 +578,7 @@ svc_destroy(struct svc_serv *serv)
cache_clean_deferred(serv);
- if (svc_serv_is_pooled(serv))
- svc_pool_map_put();
+ svc_pool_map_put(serv->sv_nrpools);
kfree(serv->sv_pools);
kfree(serv);
@@ -638,7 +664,7 @@ out_enomem:
}
EXPORT_SYMBOL_GPL(svc_rqst_alloc);
-struct svc_rqst *
+static struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
{
struct svc_rqst *rqstp;
@@ -647,14 +673,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return ERR_PTR(-ENOMEM);
- serv->sv_nrthreads++;
+ svc_get(serv);
+ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads += 1;
+ spin_unlock_bh(&serv->sv_lock);
+
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
return rqstp;
}
-EXPORT_SYMBOL_GPL(svc_prepare_thread);
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
@@ -728,11 +757,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
- __module_get(serv->sv_ops->svo_module);
- task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
+ task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
- module_put(serv->sv_ops->svo_module);
svc_exit_thread(rqstp);
return PTR_ERR(task);
}
@@ -748,59 +775,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
-
-/* destroy old threads */
-static int
-svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
-{
- struct task_struct *task;
- unsigned int state = serv->sv_nrthreads-1;
-
- /* destroy old threads */
- do {
- task = choose_victim(serv, pool, &state);
- if (task == NULL)
- break;
- send_sig(SIGINT, task, 1);
- nrservs++;
- } while (nrservs < 0);
-
- return 0;
-}
-
/*
* Create or destroy enough new threads to make the number
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Caller must ensure that mutual exclusion between this and
* server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do. Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
*/
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
-{
- if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
- } else {
- spin_lock_bh(&pool->sp_lock);
- nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
-
- if (nrservs > 0)
- return svc_start_kthreads(serv, pool, nrservs);
- if (nrservs < 0)
- return svc_signal_kthreads(serv, pool, nrservs);
- return 0;
-}
-EXPORT_SYMBOL_GPL(svc_set_num_threads);
/* destroy old threads */
static int
@@ -821,11 +802,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
}
int
-svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
+ nrservs -= serv->sv_nrthreads;
} else {
spin_lock_bh(&pool->sp_lock);
nrservs -= pool->sp_nrthreads;
@@ -838,7 +818,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
return svc_stop_kthreads(serv, pool, nrservs);
return 0;
}
-EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+EXPORT_SYMBOL_GPL(svc_set_num_threads);
/**
* svc_rqst_replace_page - Replace one page in rq_pages[]
@@ -890,11 +870,14 @@ svc_exit_thread(struct svc_rqst *rqstp)
list_del_rcu(&rqstp->rq_all);
spin_unlock_bh(&pool->sp_lock);
+ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads -= 1;
+ spin_unlock_bh(&serv->sv_lock);
+ svc_sock_update_bufs(serv);
+
svc_rqst_free(rqstp);
- /* Release the server */
- if (serv)
- svc_destroy(serv);
+ svc_put(serv);
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
@@ -1222,7 +1205,7 @@ svc_generic_init_request(struct svc_rqst *rqstp,
goto err_bad_proc;
/* Initialize storage for argp and resp */
- memset(rqstp->rq_argp, 0, procp->pc_argsize);
+ memset(rqstp->rq_argp, 0, procp->pc_argzero);
memset(rqstp->rq_resp, 0, procp->pc_ressize);
/* Bump per-procedure stats counter */
@@ -1261,10 +1244,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off by GSS integrity and privacy services */
- set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
- set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
- clear_bit(RQ_DROPME, &rqstp->rq_flags);
+ __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ __clear_bit(RQ_DROPME, &rqstp->rq_flags);
svc_putu32(resv, rqstp->rq_xid);
@@ -1382,7 +1365,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_authorise(rqstp);
close_xprt:
if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
- svc_close_xprt(rqstp->rq_xprt);
+ svc_xprt_close(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
@@ -1451,8 +1434,7 @@ svc_process(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
- struct svc_serv *serv = rqstp->rq_server;
- u32 dir;
+ __be32 dir;
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
if (!fail_sunrpc.ignore_server_disconnect &&
@@ -1467,7 +1449,7 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_next_page = &rqstp->rq_respages[1];
resv->iov_base = page_address(rqstp->rq_respages[0]);
resv->iov_len = 0;
- rqstp->rq_res.pages = rqstp->rq_respages + 1;
+ rqstp->rq_res.pages = rqstp->rq_next_page;
rqstp->rq_res.len = 0;
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = 0;
@@ -1475,18 +1457,17 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_base = NULL;
rqstp->rq_res.tail[0].iov_len = 0;
- dir = svc_getnl(argv);
- if (dir != 0) {
- /* direction != CALL */
- svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
- serv->sv_stats->rpcbadfmt++;
+ dir = svc_getu32(argv);
+ if (dir != rpc_call)
+ goto out_baddir;
+ if (!svc_process_common(rqstp, argv, resv))
goto out_drop;
- }
-
- /* Returns 1 for send, 0 for drop */
- if (likely(svc_process_common(rqstp, argv, resv)))
- return svc_send(rqstp);
+ return svc_send(rqstp);
+out_baddir:
+ svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
+ be32_to_cpu(dir));
+ rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
return 0;
@@ -1573,8 +1554,12 @@ out:
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/*
- * Return (transport-specific) limit on the rpc payload.
+/**
+ * svc_max_payload - Return transport-specific limit on the RPC payload
+ * @rqstp: RPC transaction context
+ *
+ * Returns the maximum number of payload bytes the current transport
+ * allows.
*/
u32 svc_max_payload(const struct svc_rqst *rqstp)
{
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 1e99ba1b9d72..2106003645a7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,6 +6,7 @@
*/
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/errno.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
@@ -161,7 +162,7 @@ static void svc_xprt_free(struct kref *kref)
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
svcauth_unix_info_release(xprt);
put_cred(xprt->xpt_cred);
- put_net(xprt->xpt_net);
+ put_net_track(xprt->xpt_net, &xprt->ns_tracker);
/* See comment on corresponding get in xs_setup_bc_tcp(): */
if (xprt->xpt_bc_xprt)
xprt_put(xprt->xpt_bc_xprt);
@@ -197,7 +198,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
- xprt->xpt_net = get_net(net);
+ xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC);
strcpy(xprt->xpt_remotebuf, "uninitialized");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
@@ -243,7 +244,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(xprt))
trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, xprt);
+ xcl->xcl_name, sap, len, xprt);
return xprt;
}
@@ -264,15 +265,13 @@ void svc_xprt_received(struct svc_xprt *xprt)
return;
}
- trace_svc_xprt_received(xprt);
-
/* As soon as we clear busy, the xprt could be closed and
- * 'put', so we need a reference to call svc_enqueue_xprt with:
+ * 'put', so we need a reference to call svc_xprt_enqueue with:
*/
svc_xprt_get(xprt);
smp_mb__before_atomic();
clear_bit(XPT_BUSY, &xprt->xpt_flags);
- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+ svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_received);
@@ -286,7 +285,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
-static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
@@ -322,21 +321,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
return -EPROTONOSUPPORT;
}
-int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+/**
+ * svc_xprt_create - Add a new listener to @serv
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @family: network address family
+ * @port: listener port
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return values:
+ * %0: New listener added successfully
+ * %-EPROTONOSUPPORT: Requested transport type not supported
+ */
+int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
int err;
- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
+ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
}
return err;
}
-EXPORT_SYMBOL_GPL(svc_create_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_create);
/*
* Copy the local and remote xprt addresses to the rqstp structure
@@ -412,6 +425,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ if (xpt_flags & BIT(XPT_BUSY))
+ return false;
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
return true;
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
@@ -424,11 +439,15 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
return false;
}
-void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+/**
+ * svc_xprt_enqueue - Queue a transport on an idle nfsd thread
+ * @xprt: transport with data pending
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
- int cpu;
if (!svc_xprt_ready(xprt))
return;
@@ -441,8 +460,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
return;
- cpu = get_cpu();
- pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
+ pool = svc_pool_for_cpu(xprt->xpt_server);
atomic_long_inc(&pool->sp_stats.packets);
@@ -465,21 +483,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
rqstp = NULL;
out_unlock:
rcu_read_unlock();
- put_cpu();
- trace_svc_xprt_do_enqueue(xprt, rqstp);
-}
-EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
-
-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
-{
- if (test_bit(XPT_BUSY, &xprt->xpt_flags))
- return;
- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+ trace_svc_xprt_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -687,8 +691,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
set_current_state(TASK_RUNNING);
return -EINTR;
}
- trace_svc_alloc_arg_err(pages);
- schedule_timeout(msecs_to_jiffies(500));
+ trace_svc_alloc_arg_err(pages, ret);
+ memalloc_retry_wait(GFP_KERNEL);
}
rqstp->rq_page_end = &rqstp->rq_pages[pages];
rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
@@ -842,8 +846,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else
svc_xprt_received(xprt);
+
out:
- trace_svc_handle_xprt(xprt, len);
return len;
}
@@ -1061,7 +1065,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
svc_xprt_put(xprt);
}
-void svc_close_xprt(struct svc_xprt *xprt)
+/**
+ * svc_xprt_close - Close a client connection
+ * @xprt: transport to disconnect
+ *
+ */
+void svc_xprt_close(struct svc_xprt *xprt)
{
trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
@@ -1076,7 +1085,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
*/
svc_delete_xprt(xprt);
}
-EXPORT_SYMBOL_GPL(svc_close_xprt);
+EXPORT_SYMBOL_GPL(svc_xprt_close);
static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
{
@@ -1128,7 +1137,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
}
}
-/*
+/**
+ * svc_xprt_destroy_all - Destroy transports associated with @serv
+ * @serv: RPC service to be shut down
+ * @net: target network namespace
+ *
* Server threads may still be running (especially in the case where the
* service is still running in other network namespaces).
*
@@ -1140,7 +1153,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
* threads, we may need to wait a little while and then check again to
* see if they're done.
*/
-void svc_close_net(struct svc_serv *serv, struct net *net)
+void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
{
int delay = 0;
@@ -1151,6 +1164,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
msleep(delay++);
}
}
+EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
/*
* Handle defer and revisit of requests
@@ -1213,7 +1227,8 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
- dr->xprt_hlen = rqstp->rq_xprt_hlen;
+ dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
+ rqstp->rq_xprt_ctxt = NULL;
/* back up head to the start of the buffer and copy */
skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
@@ -1223,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
- set_bit(RQ_DROPME, &rqstp->rq_flags);
+ __set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
@@ -1239,21 +1254,21 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
trace_svc_defer_recv(dr);
/* setup iov_base past transport header */
- rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
+ rqstp->rq_arg.head[0].iov_base = dr->args;
/* The iov_len does not include the transport header bytes */
- rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
+ rqstp->rq_arg.head[0].iov_len = dr->argslen << 2;
rqstp->rq_arg.page_len = 0;
/* The rq_arg.len includes the transport header bytes */
- rqstp->rq_arg.len = dr->argslen<<2;
+ rqstp->rq_arg.len = dr->argslen << 2;
rqstp->rq_prot = dr->prot;
memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
rqstp->rq_addrlen = dr->addrlen;
/* Save off transport header len in case we get deferred again */
- rqstp->rq_xprt_hlen = dr->xprt_hlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
+ rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
svc_xprt_received(rqstp->rq_xprt);
- return (dr->argslen<<2) - dr->xprt_hlen;
+ return dr->argslen << 2;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5a8b8e03fdd4..e72ba2f13f6c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -31,10 +31,12 @@
*/
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = {
[RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null,
[RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix,
+ [RPC_AUTH_TLS] = (struct auth_ops __force __rcu *)&svcauth_tls,
};
static struct auth_ops *
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d7ed7d49115a..b1efc34db6ed 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,6 +37,7 @@ struct unix_domain {
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
+extern struct auth_ops svcauth_tls;
static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
{
@@ -789,6 +790,65 @@ struct auth_ops svcauth_null = {
static int
+svcauth_tls_accept(struct svc_rqst *rqstp)
+{
+ struct svc_cred *cred = &rqstp->rq_cred;
+ struct kvec *argv = rqstp->rq_arg.head;
+ struct kvec *resv = rqstp->rq_res.head;
+
+ if (argv->iov_len < XDR_UNIT * 3)
+ return SVC_GARBAGE;
+
+ /* Call's cred length */
+ if (svc_getu32(argv) != xdr_zero) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ /* Call's verifier flavor and its length */
+ if (svc_getu32(argv) != rpc_auth_null ||
+ svc_getu32(argv) != xdr_zero) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
+ return SVC_DENIED;
+ }
+
+ /* AUTH_TLS is not valid on non-NULL procedures */
+ if (rqstp->rq_proc != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ /* Mapping to nobody uid/gid is required */
+ cred->cr_uid = INVALID_UID;
+ cred->cr_gid = INVALID_GID;
+ cred->cr_group_info = groups_alloc(0);
+ if (cred->cr_group_info == NULL)
+ return SVC_CLOSE; /* kmalloc failure - client must retry */
+
+ /* Reply's verifier */
+ svc_putnl(resv, RPC_AUTH_NULL);
+ if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
+ svc_putnl(resv, 8);
+ memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
+ resv->iov_len += 8;
+ } else
+ svc_putnl(resv, 0);
+
+ rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
+ return SVC_OK;
+}
+
+struct auth_ops svcauth_tls = {
+ .name = "tls",
+ .owner = THIS_MODULE,
+ .flavour = RPC_AUTH_TLS,
+ .accept = svcauth_tls_accept,
+ .release = svcauth_null_release,
+ .set_client = svcauth_unix_set_client,
+};
+
+
+static int
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478f857cdaed..2fc98fea59b4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -117,15 +117,6 @@ static void svc_reclassify_socket(struct socket *sock)
*/
static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
{
- struct sk_buff *skb = rqstp->rq_xprt_ctxt;
-
- if (skb) {
- struct svc_sock *svsk =
- container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-
- rqstp->rq_xprt_ctxt = NULL;
- skb_free_datagram_locked(svsk->sk_sk, skb);
- }
}
/**
@@ -259,8 +250,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
ssize_t len;
size_t t;
- rqstp->rq_xprt_hlen = 0;
-
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
@@ -309,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
if (svc_port_is_privileged(svc_addr(rqstp)))
- set_bit(RQ_SECURE, &rqstp->rq_flags);
+ __set_bit(RQ_SECURE, &rqstp->rq_flags);
else
- clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ __clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -464,7 +453,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
0, 0, MSG_PEEK | MSG_DONTWAIT);
if (err < 0)
goto out_recv_err;
- skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
+ skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err);
if (!skb)
goto out_recv_err;
@@ -579,15 +568,18 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
+ err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (err < 0)
+ goto out_unlock;
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
- xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
- xdr_free_bvec(xdr);
}
+ xdr_free_bvec(xdr);
trace_svcsock_udp_send(xprt, err);
-
+out_unlock:
mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
@@ -1016,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
- set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ __set_bit(RQ_LOCAL, &rqstp->rq_flags);
else
- clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+ __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
@@ -1096,7 +1088,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
int ret;
*sentp = 0;
- xdr_alloc_bvec(xdr, GFP_KERNEL);
+ ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
if (ret < 0)
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 2766dd21935b..c1f559892ae8 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -93,11 +93,14 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
ssize_t ret;
- if (!xprt)
- return 0;
+ if (!xprt) {
+ ret = sprintf(buf, "<closed>\n");
+ goto out;
+ }
ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]);
xprt_put(xprt);
- return ret + 1;
+out:
+ return ret;
}
static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
@@ -105,38 +108,45 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
char *buf)
{
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
- struct sockaddr_storage saddr;
- struct sock_xprt *sock;
- ssize_t ret = -1;
+ size_t buflen = PAGE_SIZE;
+ ssize_t ret;
if (!xprt || !xprt_connected(xprt)) {
- xprt_put(xprt);
- return -ENOTCONN;
- }
-
- sock = container_of(xprt, struct sock_xprt, xprt);
- if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
- goto out;
-
- ret = sprintf(buf, "%pISc\n", &saddr);
-out:
+ ret = sprintf(buf, "<closed>\n");
+ } else if (xprt->ops->get_srcaddr) {
+ ret = xprt->ops->get_srcaddr(xprt, buf, buflen);
+ if (ret > 0) {
+ if (ret < buflen - 1) {
+ buf[ret] = '\n';
+ ret++;
+ buf[ret] = '\0';
+ }
+ } else
+ ret = sprintf(buf, "<closed>\n");
+ } else
+ ret = sprintf(buf, "<not a socket>\n");
xprt_put(xprt);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+ struct kobj_attribute *attr, char *buf)
{
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ unsigned short srcport = 0;
+ size_t buflen = PAGE_SIZE;
ssize_t ret;
if (!xprt || !xprt_connected(xprt)) {
- xprt_put(xprt);
- return -ENOTCONN;
+ ret = sprintf(buf, "<closed>\n");
+ goto out;
}
- ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
+ if (xprt->ops->get_srcport)
+ srcport = xprt->ops->get_srcport(xprt);
+
+ ret = snprintf(buf, buflen,
+ "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
"max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
"binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n"
"backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n"
@@ -144,14 +154,12 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs,
xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen,
xprt->sending.qlen, xprt->pending.qlen,
- xprt->backlog.qlen, xprt->main,
- (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
- get_srcport(xprt) : 0,
+ xprt->backlog.qlen, xprt->main, srcport,
atomic_long_read(&xprt->queuelen),
- (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
- xprt->address_strings[RPC_DISPLAY_PORT] : "0");
+ xprt->address_strings[RPC_DISPLAY_PORT]);
+out:
xprt_put(xprt);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
@@ -163,10 +171,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
int locked, connected, connecting, close_wait, bound, binding,
closing, congested, cwnd_wait, write_space, offline, remove;
- if (!xprt)
- return 0;
-
- if (!xprt->state) {
+ if (!(xprt && xprt->state)) {
ret = sprintf(buf, "state=CLOSED\n");
} else {
locked = test_bit(XPRT_LOCKED, &xprt->state);
@@ -198,7 +203,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
}
xprt_put(xprt);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
@@ -217,7 +222,7 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
xprt_switch->xps_nunique_destaddr_xprts,
atomic_long_read(&xprt_switch->xps_queuelen));
xprt_switch_put(xprt_switch);
- return ret + 1;
+ return ret;
}
static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
@@ -286,8 +291,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
int offline = 0, online = 0, remove = 0;
struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj);
- if (!xprt)
- return 0;
+ if (!xprt || !xps) {
+ count = 0;
+ goto out_put;
+ }
if (!strncmp(buf, "offline", 7))
offline = 1;
@@ -295,8 +302,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
online = 1;
else if (!strncmp(buf, "remove", 6))
remove = 1;
- else
- return -EINVAL;
+ else {
+ count = -EINVAL;
+ goto out_put;
+ }
if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
count = -EINTR;
@@ -307,29 +316,14 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
goto release_tasks;
}
if (offline) {
- set_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive--;
- spin_unlock(&xps->xps_lock);
+ xprt_set_offline_locked(xprt, xps);
} else if (online) {
- clear_bit(XPRT_OFFLINE, &xprt->state);
- spin_lock(&xps->xps_lock);
- xps->xps_nactive++;
- spin_unlock(&xps->xps_lock);
+ xprt_set_online_locked(xprt, xps);
} else if (remove) {
- if (test_bit(XPRT_OFFLINE, &xprt->state)) {
- set_bit(XPRT_REMOVE, &xprt->state);
- xprt_force_disconnect(xprt);
- if (test_bit(XPRT_CONNECTED, &xprt->state)) {
- if (!xprt->sending.qlen &&
- !xprt->pending.qlen &&
- !xprt->backlog.qlen &&
- !atomic_long_read(&xprt->queuelen))
- rpc_xprt_switch_remove_xprt(xps, xprt);
- }
- } else {
+ if (test_bit(XPRT_OFFLINE, &xprt->state))
+ xprt_delete_locked(xprt, xps);
+ else
count = -EINVAL;
- }
}
release_tasks:
@@ -422,6 +416,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_change_state.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt);
static struct kobj_attribute rpc_sysfs_xprt_switch_info =
__ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
@@ -430,6 +425,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
&rpc_sysfs_xprt_switch_info.attr,
NULL,
};
+ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch);
static struct kobj_type rpc_sysfs_client_type = {
.release = rpc_sysfs_client_release,
@@ -439,14 +435,14 @@ static struct kobj_type rpc_sysfs_client_type = {
static struct kobj_type rpc_sysfs_xprt_switch_type = {
.release = rpc_sysfs_xprt_switch_release,
- .default_attrs = rpc_sysfs_xprt_switch_attrs,
+ .default_groups = rpc_sysfs_xprt_switch_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_switch_namespace,
};
static struct kobj_type rpc_sysfs_xprt_type = {
.release = rpc_sysfs_xprt_release,
- .default_attrs = rpc_sysfs_xprt_attrs,
+ .default_groups = rpc_sysfs_xprt_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_namespace,
};
@@ -522,13 +518,16 @@ void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
struct net *net)
{
struct rpc_sysfs_client *rpc_client;
+ struct rpc_sysfs_xprt_switch *xswitch =
+ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+
+ if (!xswitch)
+ return;
rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj,
net, clnt->cl_clid);
if (rpc_client) {
char name[] = "switch";
- struct rpc_sysfs_xprt_switch *xswitch =
- (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
int ret;
clnt->cl_sysfs = rpc_client;
@@ -562,6 +561,8 @@ void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
rpc_xprt_switch->xprt_switch = xprt_switch;
rpc_xprt_switch->xprt = xprt;
kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD);
+ } else {
+ xprt_switch->xps_sysfs = NULL;
}
}
@@ -573,6 +574,9 @@ void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
struct rpc_sysfs_xprt_switch *switch_obj =
(struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+ if (!switch_obj)
+ return;
+
rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags);
if (rpc_xprt) {
xprt->xprt_sysfs = rpc_xprt;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index df194cc07035..336a7c7833e4 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -775,6 +775,34 @@ static void xdr_buf_pages_shift_left(const struct xdr_buf *buf,
xdr_buf_tail_copy_left(buf, 0, len - buf->page_len, shift);
}
+static void xdr_buf_head_shift_left(const struct xdr_buf *buf,
+ unsigned int base, unsigned int len,
+ unsigned int shift)
+{
+ const struct kvec *head = buf->head;
+ unsigned int bytes;
+
+ if (!shift || !len)
+ return;
+
+ if (shift > base) {
+ bytes = (shift - base);
+ if (bytes >= len)
+ return;
+ base += bytes;
+ len -= bytes;
+ }
+
+ if (base < head->iov_len) {
+ bytes = min_t(unsigned int, len, head->iov_len - base);
+ memmove(head->iov_base + (base - shift),
+ head->iov_base + base, bytes);
+ base += bytes;
+ len -= bytes;
+ }
+ xdr_buf_pages_shift_left(buf, base - head->iov_len, len, shift);
+}
+
/**
* xdr_shrink_bufhead
* @buf: xdr_buf
@@ -919,7 +947,29 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
- * xdr_commit_encode - Ensure all data is written to buffer
+ * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer into which to encode data
+ * @pages: list of pages to decode into
+ * @rqst: pointer to controlling rpc_rqst, for debugging
+ *
+ */
+void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+ struct page **pages, struct rpc_rqst *rqst)
+{
+ xdr_reset_scratch_buffer(xdr);
+
+ xdr->buf = buf;
+ xdr->page_ptr = pages;
+ xdr->iov = NULL;
+ xdr->p = page_address(*pages);
+ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
+ xdr->rqst = rqst;
+}
+EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
+
+/**
+ * __xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
@@ -931,26 +981,29 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
-inline void xdr_commit_encode(struct xdr_stream *xdr)
+void __xdr_commit_encode(struct xdr_stream *xdr)
{
- int shift = xdr->scratch.iov_len;
+ size_t shift = xdr->scratch.iov_len;
void *page;
- if (shift == 0)
- return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr_reset_scratch_buffer(xdr);
}
-EXPORT_SYMBOL_GPL(xdr_commit_encode);
+EXPORT_SYMBOL_GPL(__xdr_commit_encode);
-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
- size_t nbytes)
+/*
+ * The buffer space to be reserved crosses the boundary between
+ * xdr->buf->head and xdr->buf->pages, or between two pages
+ * in xdr->buf->pages.
+ */
+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
- __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
+ void *p;
if (nbytes > PAGE_SIZE)
goto out_overflow; /* Bigger buffers require special handling */
@@ -964,6 +1017,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
+
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
@@ -972,14 +1026,19 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
* space at the end of the previous buffer:
*/
xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
- p = page_address(*xdr->page_ptr);
+
/*
- * Note this is where the next encode will start after we've
- * shifted this one back:
+ * xdr->p is where the next encode will start after
+ * xdr_commit_encode() has shifted this one back:
*/
- xdr->p = (void *)p + frag2bytes;
+ p = page_address(*xdr->page_ptr);
+ xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - frag1bytes >= PAGE_SIZE)
+ xdr->end = p + PAGE_SIZE;
+ else
+ xdr->end = p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
@@ -1463,71 +1522,35 @@ unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
-unsigned int xdr_align_data(struct xdr_stream *xdr, unsigned int offset,
- unsigned int length)
-{
- struct xdr_buf *buf = xdr->buf;
- unsigned int from, bytes, len;
- unsigned int shift;
-
- xdr_realign_pages(xdr);
- from = xdr_page_pos(xdr);
-
- if (from >= buf->page_len + buf->tail->iov_len)
- return 0;
- if (from + buf->head->iov_len >= buf->len)
- return 0;
-
- len = buf->len - buf->head->iov_len;
-
- /* We only shift data left! */
- if (WARN_ONCE(from < offset, "SUNRPC: misaligned data src=%u dst=%u\n",
- from, offset))
- return 0;
- if (WARN_ONCE(offset > buf->page_len,
- "SUNRPC: buffer overflow. offset=%u, page_len=%u\n",
- offset, buf->page_len))
- return 0;
-
- /* Move page data to the left */
- shift = from - offset;
- xdr_buf_pages_shift_left(buf, from, len, shift);
-
- bytes = xdr_stream_remaining(xdr);
- if (length > bytes)
- length = bytes;
- bytes -= length;
-
- xdr->buf->len -= shift;
- xdr_set_page(xdr, offset + length, bytes);
- return length;
-}
-EXPORT_SYMBOL_GPL(xdr_align_data);
-
-unsigned int xdr_expand_hole(struct xdr_stream *xdr, unsigned int offset,
- unsigned int length)
+/**
+ * xdr_set_pagelen - Sets the length of the XDR pages
+ * @xdr: pointer to xdr_stream struct
+ * @len: new length of the XDR page data
+ *
+ * Either grows or shrinks the length of the xdr pages by setting pagelen to
+ * @len bytes. When shrinking, any extra data is moved into buf->tail, whereas
+ * when growing any data beyond the current pointer is moved into the tail.
+ *
+ * Returns True if the operation was successful, and False otherwise.
+ */
+void xdr_set_pagelen(struct xdr_stream *xdr, unsigned int len)
{
struct xdr_buf *buf = xdr->buf;
- unsigned int from, to, shift;
-
- xdr_realign_pages(xdr);
- from = xdr_page_pos(xdr);
- to = xdr_align_size(offset + length);
-
- /* Could the hole be behind us? */
- if (to > from) {
- unsigned int buflen = buf->len - buf->head->iov_len;
- shift = to - from;
- xdr_buf_try_expand(buf, shift);
- xdr_buf_pages_shift_right(buf, from, buflen, shift);
- xdr_set_page(xdr, to, xdr_stream_remaining(xdr));
- } else if (to != from)
- xdr_align_data(xdr, to, 0);
- xdr_buf_pages_zero(buf, offset, length);
+ size_t remaining = xdr_stream_remaining(xdr);
+ size_t base = 0;
- return length;
+ if (len < buf->page_len) {
+ base = buf->page_len - len;
+ xdr_shrink_pagelen(buf, len);
+ } else {
+ xdr_buf_head_shift_right(buf, xdr_stream_pos(xdr),
+ buf->page_len, remaining);
+ if (len > buf->page_len)
+ xdr_buf_try_expand(buf, len - buf->page_len);
+ }
+ xdr_set_tail_base(xdr, base, remaining);
}
-EXPORT_SYMBOL_GPL(xdr_expand_hole);
+EXPORT_SYMBOL_GPL(xdr_set_pagelen);
/**
* xdr_enter_page - decode data from the XDR page
@@ -1574,7 +1597,7 @@ EXPORT_SYMBOL_GPL(xdr_buf_from_iov);
*
* @buf and @subbuf may be pointers to the same struct xdr_buf.
*
- * Returns -1 if base of length are out of bounds.
+ * Returns -1 if base or length are out of bounds.
*/
int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf,
unsigned int base, unsigned int len)
@@ -1672,6 +1695,60 @@ bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
EXPORT_SYMBOL_GPL(xdr_stream_subsegment);
/**
+ * xdr_stream_move_subsegment - Move part of a stream to another position
+ * @xdr: the source xdr_stream
+ * @offset: the source offset of the segment
+ * @target: the target offset of the segment
+ * @length: the number of bytes to move
+ *
+ * Moves @length bytes from @offset to @target in the xdr_stream, overwriting
+ * anything in its space. Returns the number of bytes in the segment.
+ */
+unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset,
+ unsigned int target, unsigned int length)
+{
+ struct xdr_buf buf;
+ unsigned int shift;
+
+ if (offset < target) {
+ shift = target - offset;
+ if (xdr_buf_subsegment(xdr->buf, &buf, offset, shift + length) < 0)
+ return 0;
+ xdr_buf_head_shift_right(&buf, 0, length, shift);
+ } else if (offset > target) {
+ shift = offset - target;
+ if (xdr_buf_subsegment(xdr->buf, &buf, target, shift + length) < 0)
+ return 0;
+ xdr_buf_head_shift_left(&buf, shift, length, shift);
+ }
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_move_subsegment);
+
+/**
+ * xdr_stream_zero - zero out a portion of an xdr_stream
+ * @xdr: an xdr_stream to zero out
+ * @offset: the starting point in the stream
+ * @length: the number of bytes to zero
+ */
+unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset,
+ unsigned int length)
+{
+ struct xdr_buf buf;
+
+ if (xdr_buf_subsegment(xdr->buf, &buf, offset, length) < 0)
+ return 0;
+ if (buf.head[0].iov_len)
+ xdr_buf_iov_zero(buf.head, 0, buf.head[0].iov_len);
+ if (buf.page_len > 0)
+ xdr_buf_pages_zero(&buf, 0, buf.page_len);
+ if (buf.tail[0].iov_len)
+ xdr_buf_iov_zero(buf.tail, 0, buf.tail[0].iov_len);
+ return length;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_zero);
+
+/**
* xdr_buf_trim - lop at most "len" bytes off the end of "buf"
* @buf: buf to be trimmed
* @len: number of bytes to reduce "buf" by
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a02de2bddb28..656cec208371 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -69,10 +69,11 @@
/*
* Local functions
*/
-static void xprt_init(struct rpc_xprt *xprt, struct net *net);
+static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
-static void xprt_destroy(struct rpc_xprt *xprt);
-static void xprt_request_init(struct rpc_task *task);
+static void xprt_destroy(struct rpc_xprt *xprt);
+static void xprt_request_init(struct rpc_task *task);
+static int xprt_request_prepare(struct rpc_rqst *req, struct xdr_buf *buf);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
@@ -929,12 +930,7 @@ void xprt_connect(struct rpc_task *task)
if (!xprt_lock_write(xprt, task))
return;
- if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- trace_xprt_disconnect_cleanup(xprt);
- xprt->ops->close(xprt);
- }
-
- if (!xprt_connected(xprt)) {
+ if (!xprt_connected(xprt) && !test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on_timeout(&xprt->pending, task, NULL,
xprt_request_timeout(task->tk_rqstp));
@@ -1143,16 +1139,19 @@ xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
* @task: RPC task
*
*/
-void
+int
xprt_request_enqueue_receive(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ int ret;
if (!xprt_request_need_enqueue_receive(task, req))
- return;
+ return 0;
- xprt_request_prepare(task->tk_rqstp);
+ ret = xprt_request_prepare(task->tk_rqstp, &req->rq_rcv_buf);
+ if (ret)
+ return ret;
spin_lock(&xprt->queue_lock);
/* Update the softirq receive buffer */
@@ -1166,6 +1165,7 @@ xprt_request_enqueue_receive(struct rpc_task *task)
/* Turn off autodisconnect */
del_singleshot_timer_sync(&xprt->timer);
+ return 0;
}
/**
@@ -1218,6 +1218,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
xprt->stat.recvs++;
+ xdr_free_bvec(&req->rq_rcv_buf);
+ req->rq_private_buf.bvec = NULL;
req->rq_private_buf.len = copied;
/* Ensure all writes are done before we update */
/* req->rq_reply_bytes_recvd */
@@ -1336,8 +1338,14 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
{
struct rpc_rqst *pos, *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ int ret;
if (xprt_request_need_enqueue_transmit(task, req)) {
+ ret = xprt_request_prepare(task->tk_rqstp, &req->rq_snd_buf);
+ if (ret) {
+ task->tk_status = ret;
+ return;
+ }
req->rq_bytes_sent = 0;
spin_lock(&xprt->queue_lock);
/*
@@ -1354,17 +1362,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
INIT_LIST_HEAD(&req->rq_xmit2);
goto out;
}
- } else if (RPC_IS_SWAPPER(task)) {
- list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
- if (pos->rq_cong || pos->rq_bytes_sent)
- continue;
- if (RPC_IS_SWAPPER(pos->rq_task))
- continue;
- /* Note: req is added _before_ pos */
- list_add_tail(&req->rq_xmit, &pos->rq_xmit);
- INIT_LIST_HEAD(&req->rq_xmit2);
- goto out;
- }
} else if (!req->rq_seqno) {
list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
if (pos->rq_task->tk_owner != task->tk_owner)
@@ -1408,6 +1405,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
} else
list_del(&req->rq_xmit2);
atomic_long_dec(&req->rq_xprt->xmit_queuelen);
+ xdr_free_bvec(&req->rq_snd_buf);
}
/**
@@ -1444,8 +1442,6 @@ xprt_request_dequeue_xprt(struct rpc_task *task)
test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
xprt_is_pinned_rqst(req)) {
spin_lock(&xprt->queue_lock);
- xprt_request_dequeue_transmit_locked(task);
- xprt_request_dequeue_receive_locked(task);
while (xprt_is_pinned_rqst(req)) {
set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
@@ -1453,24 +1449,30 @@ xprt_request_dequeue_xprt(struct rpc_task *task)
spin_lock(&xprt->queue_lock);
clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
}
+ xprt_request_dequeue_transmit_locked(task);
+ xprt_request_dequeue_receive_locked(task);
spin_unlock(&xprt->queue_lock);
+ xdr_free_bvec(&req->rq_rcv_buf);
}
}
/**
* xprt_request_prepare - prepare an encoded request for transport
* @req: pointer to rpc_rqst
+ * @buf: pointer to send/rcv xdr_buf
*
* Calls into the transport layer to do whatever is needed to prepare
* the request for transmission or receive.
+ * Returns error, or zero.
*/
-void
-xprt_request_prepare(struct rpc_rqst *req)
+static int
+xprt_request_prepare(struct rpc_rqst *req, struct xdr_buf *buf)
{
struct rpc_xprt *xprt = req->rq_xprt;
if (xprt->ops->prepare_request)
- xprt->ops->prepare_request(req);
+ return xprt->ops->prepare_request(req, buf);
+ return 0;
}
/**
@@ -1503,6 +1505,9 @@ bool xprt_prepare_transmit(struct rpc_task *task)
return false;
}
+ if (atomic_read(&xprt->swapper))
+ /* This will be clear in __rpc_execute */
+ current->flags |= PF_MEMALLOC;
return true;
}
@@ -1692,7 +1697,7 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
goto out;
++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
- req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+ req = kzalloc(sizeof(*req), rpc_task_gfp_mask());
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
@@ -1783,7 +1788,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt)
{
int id;
- id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL);
+ id = ida_alloc(&rpc_xprt_ids, GFP_KERNEL);
if (id < 0)
return id;
@@ -1793,7 +1798,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt)
static void xprt_free_id(struct rpc_xprt *xprt)
{
- ida_simple_remove(&rpc_xprt_ids, xprt->id);
+ ida_free(&rpc_xprt_ids, xprt->id);
}
struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
@@ -1817,10 +1822,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
goto out_free;
list_add(&req->rq_list, &xprt->free);
}
- if (max_alloc > num_prealloc)
- xprt->max_reqs = max_alloc;
- else
- xprt->max_reqs = num_prealloc;
+ xprt->max_reqs = max_t(unsigned int, max_alloc, num_prealloc);
xprt->min_reqs = num_prealloc;
xprt->num_reqs = num_prealloc;
@@ -1835,7 +1837,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
void xprt_free(struct rpc_xprt *xprt)
{
- put_net(xprt->xprt_net);
+ put_net_track(xprt->xprt_net, &xprt->ns_tracker);
xprt_free_all_slots(xprt);
xprt_free_id(xprt);
rpc_sysfs_xprt_destroy(xprt);
@@ -1863,7 +1865,7 @@ xprt_alloc_xid(struct rpc_xprt *xprt)
static void
xprt_init_xid(struct rpc_xprt *xprt)
{
- xprt->xid = prandom_u32();
+ xprt->xid = get_random_u32();
}
static void
@@ -1967,8 +1969,6 @@ void xprt_release(struct rpc_task *task)
spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
- xdr_free_bvec(&req->rq_rcv_buf);
- xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
if (req->rq_release_snd_buf)
@@ -2027,7 +2027,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
xprt_init_xid(xprt);
- xprt->xprt_net = get_net(net);
+ xprt->xprt_net = get_net_track(net, &xprt->ns_tracker, GFP_KERNEL);
}
/**
@@ -2112,7 +2112,14 @@ static void xprt_destroy(struct rpc_xprt *xprt)
*/
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
+ /*
+ * xprt_schedule_autodisconnect() can run after XPRT_LOCKED
+ * is cleared. We use ->transport_lock to ensure the mod_timer()
+ * can only run *before* del_time_sync(), never after.
+ */
+ spin_lock(&xprt->transport_lock);
del_timer_sync(&xprt->timer);
+ spin_unlock(&xprt->transport_lock);
/*
* Destroy sockets etc from the system workqueue so they can
@@ -2151,3 +2158,35 @@ void xprt_put(struct rpc_xprt *xprt)
kref_put(&xprt->kref, xprt_destroy_kref);
}
EXPORT_SYMBOL_GPL(xprt_put);
+
+void xprt_set_offline_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps)
+{
+ if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive--;
+ spin_unlock(&xps->xps_lock);
+ }
+}
+
+void xprt_set_online_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps)
+{
+ if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) {
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive++;
+ spin_unlock(&xps->xps_lock);
+ }
+}
+
+void xprt_delete_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps)
+{
+ if (test_and_set_bit(XPRT_REMOVE, &xprt->state))
+ return;
+
+ xprt_force_disconnect(xprt);
+ if (!test_bit(XPRT_CONNECTED, &xprt->state))
+ return;
+
+ if (!xprt->sending.qlen && !xprt->pending.qlen &&
+ !xprt->backlog.qlen && !atomic_long_read(&xprt->queuelen))
+ rpc_xprt_switch_remove_xprt(xps, xprt, true);
+}
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 1693f81aae37..701250b305db 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -27,6 +27,7 @@ typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline;
static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
struct rpc_xprt *xprt)
@@ -61,11 +62,11 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
}
static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
- struct rpc_xprt *xprt)
+ struct rpc_xprt *xprt, bool offline)
{
if (unlikely(xprt == NULL))
return;
- if (!test_bit(XPRT_OFFLINE, &xprt->state))
+ if (!test_bit(XPRT_OFFLINE, &xprt->state) && offline)
xps->xps_nactive--;
xps->xps_nxprts--;
if (xps->xps_nxprts == 0)
@@ -78,14 +79,15 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
* rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
* @xps: pointer to struct rpc_xprt_switch
* @xprt: pointer to struct rpc_xprt
+ * @offline: indicates if the xprt that's being removed is in an offline state
*
* Removes xprt from the list of struct rpc_xprt in xps.
*/
void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
- struct rpc_xprt *xprt)
+ struct rpc_xprt *xprt, bool offline)
{
spin_lock(&xps->xps_lock);
- xprt_switch_remove_xprt_locked(xps, xprt);
+ xprt_switch_remove_xprt_locked(xps, xprt, offline);
spin_unlock(&xps->xps_lock);
xprt_put(xprt);
}
@@ -101,7 +103,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
{
int id;
- id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags);
+ id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags);
if (id < 0)
return id;
@@ -111,7 +113,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
static void xprt_switch_free_id(struct rpc_xprt_switch *xps)
{
- ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id);
+ ida_free(&rpc_xprtswitch_ids, xps->xps_id);
}
/**
@@ -154,7 +156,7 @@ static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
xprt = list_first_entry(&xps->xps_xprt_list,
struct rpc_xprt, xprt_switch);
- xprt_switch_remove_xprt_locked(xps, xprt);
+ xprt_switch_remove_xprt_locked(xps, xprt, true);
spin_unlock(&xps->xps_lock);
xprt_put(xprt);
spin_lock(&xps->xps_lock);
@@ -249,6 +251,18 @@ struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
}
static
+struct rpc_xprt *xprt_switch_find_first_entry_offline(struct list_head *head)
+{
+ struct rpc_xprt *pos;
+
+ list_for_each_entry_rcu(pos, head, xprt_switch) {
+ if (!xprt_is_active(pos))
+ return pos;
+ }
+ return NULL;
+}
+
+static
struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
{
struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
@@ -259,8 +273,9 @@ struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
}
static
-struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
- const struct rpc_xprt *cur)
+struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head,
+ const struct rpc_xprt *cur,
+ bool find_active)
{
struct rpc_xprt *pos;
bool found = false;
@@ -268,14 +283,25 @@ struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == pos)
found = true;
- if (found && xprt_is_active(pos))
+ if (found && ((find_active && xprt_is_active(pos)) ||
+ (!find_active && xprt_is_active(pos))))
return pos;
}
return NULL;
}
static
-struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
+struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
+ const struct rpc_xprt *cur)
+{
+ return _xprt_switch_find_current_entry(head, cur, true);
+}
+
+static
+struct rpc_xprt * _xprt_iter_current_entry(struct rpc_xprt_iter *xpi,
+ struct rpc_xprt *first_entry(struct list_head *head),
+ struct rpc_xprt *current_entry(struct list_head *head,
+ const struct rpc_xprt *cur))
{
struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
struct list_head *head;
@@ -284,8 +310,30 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
return NULL;
head = &xps->xps_xprt_list;
if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
- return xprt_switch_find_first_entry(head);
- return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
+ return first_entry(head);
+ return current_entry(head, xpi->xpi_cursor);
+}
+
+static
+struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
+{
+ return _xprt_iter_current_entry(xpi, xprt_switch_find_first_entry,
+ xprt_switch_find_current_entry);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_current_entry_offline(struct list_head *head,
+ const struct rpc_xprt *cur)
+{
+ return _xprt_switch_find_current_entry(head, cur, false);
+}
+
+static
+struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
+{
+ return _xprt_iter_current_entry(xpi,
+ xprt_switch_find_first_entry_offline,
+ xprt_switch_find_current_entry_offline);
}
bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
@@ -310,7 +358,7 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
- const struct rpc_xprt *cur)
+ const struct rpc_xprt *cur, bool check_active)
{
struct rpc_xprt *pos, *prev = NULL;
bool found = false;
@@ -318,7 +366,12 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == prev)
found = true;
- if (found && xprt_is_active(pos))
+ /* for request to return active transports return only
+ * active, for request to return offline transports
+ * return only offline
+ */
+ if (found && ((check_active && xprt_is_active(pos)) ||
+ (!check_active && !xprt_is_active(pos))))
return pos;
prev = pos;
}
@@ -355,7 +408,7 @@ struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head
{
struct rpc_xprt *ret;
- ret = xprt_switch_find_next_entry(head, cur);
+ ret = xprt_switch_find_next_entry(head, cur, true);
if (ret != NULL)
return ret;
return xprt_switch_find_first_entry(head);
@@ -397,7 +450,14 @@ static
struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps,
const struct rpc_xprt *cur)
{
- return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur);
+ return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, true);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_next_entry_offline(struct rpc_xprt_switch *xps,
+ const struct rpc_xprt *cur)
+{
+ return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur, false);
}
static
@@ -407,6 +467,13 @@ struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
xprt_switch_find_next_entry_all);
}
+static
+struct rpc_xprt *xprt_iter_next_entry_offline(struct rpc_xprt_iter *xpi)
+{
+ return xprt_iter_next_entry_multiple(xpi,
+ xprt_switch_find_next_entry_offline);
+}
+
/*
* xprt_iter_rewind - Resets the xprt iterator
* @xpi: pointer to rpc_xprt_iter
@@ -414,7 +481,6 @@ struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
* Resets xpi to ensure that it points to the first entry in the list
* of transports.
*/
-static
void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
{
rcu_read_lock();
@@ -460,6 +526,12 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
__xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
}
+void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi,
+ struct rpc_xprt_switch *xps)
+{
+ __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listoffline);
+}
+
/**
* xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
* @xpi: pointer to rpc_xprt_iter
@@ -574,3 +646,10 @@ const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
.xpi_xprt = xprt_iter_current_entry,
.xpi_next = xprt_iter_next_entry_all,
};
+
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_listoffline = {
+ .xpi_rewind = xprt_iter_default_rewind,
+ .xpi_xprt = xprt_iter_current_entry_offline,
+ .xpi_next = xprt_iter_next_entry_offline,
+};
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 17f174d6ea3b..e4d84a13c566 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -13,10 +13,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
#undef RPCRDMA_BACKCHANNEL_DEBUG
/**
@@ -193,7 +189,7 @@ create_req:
return NULL;
size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE);
- req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
+ req = rpcrdma_req_create(r_xprt, size);
if (!req)
return NULL;
if (rpcrdma_req_setup(r_xprt, req)) {
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ff699307e820..ffbf99894970 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -45,10 +45,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_mr *mr)
{
@@ -128,16 +124,16 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
unsigned int depth = ep->re_max_fr_depth;
struct scatterlist *sg;
struct ib_mr *frmr;
- int rc;
+
+ sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS,
+ ibdev_to_node(ep->re_id->device));
+ if (!sg)
+ return -ENOMEM;
frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);
if (IS_ERR(frmr))
goto out_mr_err;
- sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS);
- if (!sg)
- goto out_list_err;
-
mr->mr_xprt = r_xprt;
mr->mr_ibmr = frmr;
mr->mr_device = NULL;
@@ -150,13 +146,9 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
return 0;
out_mr_err:
- rc = PTR_ERR(frmr);
- trace_xprtrdma_frwr_alloc(mr, rc);
- return rc;
-
-out_list_err:
- ib_dereg_mr(frmr);
- return -ENOMEM;
+ kfree(sg);
+ trace_xprtrdma_frwr_alloc(mr, PTR_ERR(frmr));
+ return PTR_ERR(frmr);
}
/**
@@ -199,7 +191,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
ep->re_attr.cap.max_recv_sge = 1;
ep->re_mrtype = IB_MR_TYPE_MEM_REG;
- if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG)
ep->re_mrtype = IB_MR_TYPE_SG_GAPS;
/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8035a983c8ce..190a4de239c8 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -54,10 +54,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
@@ -1125,6 +1121,7 @@ static bool
rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p;
@@ -1148,6 +1145,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
if (*p != cpu_to_be32(RPC_CALL))
return false;
+ /* No bc service. */
+ if (xprt->bc_serv == NULL)
+ return false;
+
/* Now that we are sure this is a backchannel call,
* advance to the RPC header.
*/
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 16897fcb659c..aa2227a7e552 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -119,12 +119,12 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
return -EINVAL;
}
- page = alloc_page(RPCRDMA_DEF_GFP);
+ page = alloc_page(GFP_NOIO | __GFP_NOWARN);
if (!page)
return -ENOMEM;
rqst->rq_buffer = page_address(page);
- rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP);
+ rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, GFP_NOIO | __GFP_NOWARN);
if (!rqst->rq_rbuffer) {
put_page(page);
return -ENOMEM;
@@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
ret = rpcrdma_bc_send_request(rdma, rqst);
if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
+ svc_xprt_close(sxprt);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index cf76a6ad127b..5242ad121450 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -831,7 +831,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_err;
if (ret == 0)
goto out_drop;
- rqstp->rq_xprt_hlen = ret;
if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
goto out_backchannel;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5f0155fdefc7..11cf7c646644 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -478,10 +478,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int write_len;
u64 offset;
- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
- if (!seg)
+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
goto out_overflow;
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
write_len = min(remaining, seg->rs_length - info->wi_seg_off);
if (!write_len)
goto out_overflow;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 94b20fb47135..199fa012f18a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- set_bit(RQ_SECURE, &rqstp->rq_flags);
+ __set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 16e5696314a4..10bb2b929c6d 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -60,10 +60,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
/*
* tunables
*/
@@ -239,8 +235,11 @@ xprt_rdma_connect_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
rx_connect_worker.work);
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ unsigned int pflags = current->flags;
int rc;
+ if (atomic_read(&xprt->swapper))
+ current->flags |= PF_MEMALLOC;
rc = rpcrdma_xprt_connect(r_xprt);
xprt_clear_connecting(xprt);
if (!rc) {
@@ -254,6 +253,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
rpcrdma_xprt_disconnect(r_xprt);
xprt_unlock_connect(xprt, r_xprt);
xprt_wake_pending_tasks(xprt, rc);
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/**
@@ -494,8 +494,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
}
trace_xprtrdma_op_connect(r_xprt, delay);
- queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
- delay);
+ queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay);
}
/**
@@ -521,7 +520,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
return;
out_sleep:
- task->tk_status = -EAGAIN;
+ task->tk_status = -ENOMEM;
xprt_add_backlog(xprt, task);
}
@@ -571,11 +570,7 @@ xprt_rdma_allocate(struct rpc_task *task)
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- gfp_t flags;
-
- flags = RPCRDMA_DEF_GFP;
- if (RPC_IS_SWAPPER(task))
- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp_t flags = rpc_task_gfp_mask();
if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
flags))
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3d3673ba9e1e..44b87e4274b4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -63,17 +63,6 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-/*
- * Globals/Macros
- */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
-/*
- * internal functions
- */
static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
@@ -87,8 +76,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
- gfp_t flags);
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
@@ -274,8 +262,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_connect_status = -ENETUNREACH;
goto wake_connect_worker;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pISpc rejected: %s\n",
- sap, rdma_reject_msg(id, event->status));
ep->re_connect_status = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
ep->re_connect_status = -ENOTCONN;
@@ -291,8 +277,6 @@ disconnected:
break;
}
- dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap,
- ep->re_id->device->name, rdma_event_msg(event->event));
return 0;
}
@@ -388,7 +372,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ep *ep;
int rc;
- ep = kzalloc(sizeof(*ep), GFP_NOFS);
+ ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS);
if (!ep)
return -ENOTCONN;
ep->re_xprt = &r_xprt->rx_xprt;
@@ -419,14 +403,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_attr.qp_type = IB_QPT_RC;
ep->re_attr.port_num = ~0;
- dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
- "iovs: send %d recv %d\n",
- __func__,
- ep->re_attr.cap.max_send_wr,
- ep->re_attr.cap.max_recv_wr,
- ep->re_attr.cap.max_send_sge,
- ep->re_attr.cap.max_recv_sge);
-
ep->re_send_batch = ep->re_max_requests >> 3;
ep->re_send_count = ep->re_send_batch;
init_waitqueue_head(&ep->re_connect_wait);
@@ -436,6 +412,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.send_cq)) {
rc = PTR_ERR(ep->re_attr.send_cq);
+ ep->re_attr.send_cq = NULL;
goto out_destroy;
}
@@ -444,6 +421,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
IB_POLL_WORKQUEUE);
if (IS_ERR(ep->re_attr.recv_cq)) {
rc = PTR_ERR(ep->re_attr.recv_cq);
+ ep->re_attr.recv_cq = NULL;
goto out_destroy;
}
ep->re_receive_count = 0;
@@ -482,6 +460,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
ep->re_pd = ib_alloc_pd(device, 0);
if (IS_ERR(ep->re_pd)) {
rc = PTR_ERR(ep->re_pd);
+ ep->re_pd = NULL;
goto out_destroy;
}
@@ -626,7 +605,7 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep)
struct rpcrdma_sendctx *sc;
sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge),
- GFP_KERNEL);
+ XPRTRDMA_GFP_FLAGS);
if (!sc)
return NULL;
@@ -649,7 +628,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
* Sends are posted.
*/
i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS;
- buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
+ buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS);
if (!buf->rb_sc_ctxs)
return -ENOMEM;
@@ -760,13 +739,16 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct ib_device *device = ep->re_id->device;
unsigned int count;
+ /* Try to allocate enough to perform one full-sized I/O */
for (count = 0; count < ep->re_max_rdma_segs; count++) {
struct rpcrdma_mr *mr;
int rc;
- mr = kzalloc(sizeof(*mr), GFP_NOFS);
+ mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS,
+ ibdev_to_node(device));
if (!mr)
break;
@@ -811,38 +793,33 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt)
/* If there is no underlying connection, it's no use
* to wake the refresh worker.
*/
- if (ep->re_connect_status == 1) {
- /* The work is scheduled on a WQ_MEM_RECLAIM
- * workqueue in order to prevent MR allocation
- * from recursing into NFS during direct reclaim.
- */
- queue_work(xprtiod_workqueue, &buf->rb_refresh_worker);
- }
+ if (ep->re_connect_status != 1)
+ return;
+ queue_work(system_highpri_wq, &buf->rb_refresh_worker);
}
/**
* rpcrdma_req_create - Allocate an rpcrdma_req object
* @r_xprt: controlling r_xprt
* @size: initial size, in bytes, of send and receive buffers
- * @flags: GFP flags passed to memory allocators
*
* Returns an allocated and fully initialized rpcrdma_req or NULL.
*/
-struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
- gfp_t flags)
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt,
+ size_t size)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
struct rpcrdma_req *req;
- req = kzalloc(sizeof(*req), flags);
+ req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS);
if (req == NULL)
goto out1;
- req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
+ req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE);
if (!req->rl_sendbuf)
goto out2;
- req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
+ req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE);
if (!req->rl_recvbuf)
goto out3;
@@ -878,7 +855,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz;
maxhdrsize *= sizeof(__be32);
rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize),
- DMA_TO_DEVICE, GFP_KERNEL);
+ DMA_TO_DEVICE);
if (!rb)
goto out;
@@ -949,12 +926,12 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
if (rep == NULL)
goto out;
rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
- DMA_FROM_DEVICE, GFP_KERNEL);
+ DMA_FROM_DEVICE);
if (!rep->rr_rdmabuf)
goto out_free;
@@ -1084,8 +1061,8 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) {
struct rpcrdma_req *req;
- req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2,
- GFP_KERNEL);
+ req = rpcrdma_req_create(r_xprt,
+ RPCRDMA_V1_DEF_INLINE_SIZE * 2);
if (!req)
goto out;
list_add(&req->rl_list, &buf->rb_send_bufs);
@@ -1255,15 +1232,14 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
* or Replies they may be registered externally via frwr_map.
*/
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
- gfp_t flags)
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
{
struct rpcrdma_regbuf *rb;
- rb = kmalloc(sizeof(*rb), flags);
+ rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
if (!rb)
return NULL;
- rb->rg_data = kmalloc(size, flags);
+ rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
if (!rb->rg_data) {
kfree(rb);
return NULL;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c79f92eeda76..5e5ff6784ef5 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -149,7 +149,11 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
return rb->rg_data;
}
-#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
+/* Do not use emergency memory reserves, and fail quickly if memory
+ * cannot be allocated easily. These flags may be used wherever there
+ * is robust logic to handle a failure to allocate.
+ */
+#define XPRTRDMA_GFP_FLAGS (__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
* the number of RDMA segments allowed in header chunk lists
@@ -467,8 +471,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
*/
-struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
- gfp_t flags);
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt,
+ size_t size);
int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void rpcrdma_req_destroy(struct rpcrdma_req *req);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d8ee06a9650a..915b9902f673 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -58,6 +58,7 @@
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
+static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -260,7 +261,7 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
switch (sap->sa_family) {
case AF_LOCAL:
sun = xs_addr_un(xprt);
- strlcpy(buf, sun->sun_path, sizeof(buf));
+ strscpy(buf, sun->sun_path, sizeof(buf));
xprt->address_strings[RPC_DISPLAY_ADDR] =
kstrdup(buf, GFP_KERNEL);
break;
@@ -427,9 +428,9 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}
- want = xs_alloc_sparse_pages(buf,
- min_t(size_t, count - offset, buf->page_len),
- GFP_KERNEL);
+ want = xs_alloc_sparse_pages(
+ buf, min_t(size_t, count - offset, buf->page_len),
+ GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
@@ -763,12 +764,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
/**
* xs_nospace - handle transmit was incomplete
* @req: pointer to RPC request
+ * @transport: pointer to struct sock_xprt
*
*/
-static int xs_nospace(struct rpc_rqst *req)
+static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
{
- struct rpc_xprt *xprt = req->rq_xprt;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk = transport->inet;
int ret = -EAGAIN;
@@ -780,32 +781,50 @@ static int xs_nospace(struct rpc_rqst *req)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
/* wait for more buffer space */
+ set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
xprt_wait_for_buffer_space(xprt);
} else
ret = -ENOTCONN;
spin_unlock(&xprt->transport_lock);
+ return ret;
+}
+
+static int xs_sock_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
- /* Race breaker in case memory is freed before above code is called */
- if (ret == -EAGAIN) {
- struct socket_wq *wq;
+ lock_sock(sk);
+ if (!sock_writeable(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
+ return ret;
+}
- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
- rcu_read_unlock();
+static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
- sk->sk_write_space(sk);
- }
+ if (vm_wait)
+ return -ENOBUFS;
+ lock_sock(sk);
+ if (!sk_stream_memory_free(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
return ret;
}
-static void
-xs_stream_prepare_request(struct rpc_rqst *req)
+static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf)
{
- xdr_free_bvec(&req->rq_rcv_buf);
- req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
+ return xdr_alloc_bvec(buf, rpc_task_gfp_mask());
}
/*
@@ -851,27 +870,27 @@ static int xs_local_send_request(struct rpc_rqst *req)
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
+ bool vm_wait;
unsigned int sent;
int status;
/* Close the stream if the previous transmission was incomplete */
if (xs_send_request_was_aborted(transport, req)) {
- xs_close(xprt);
+ xprt_force_disconnect(xprt);
return -ENOTCONN;
}
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
+ vm_wait = sk_stream_is_writeable(transport->inet) ? true : false;
+
req->rq_xtime = ktime_get();
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
- if (status == -EAGAIN && sock_writeable(transport->inet))
- status = -ENOBUFS;
-
if (likely(sent > 0) || status == 0) {
transport->xmit.offset += sent;
req->rq_bytes_sent = transport->xmit.offset;
@@ -881,20 +900,19 @@ static int xs_local_send_request(struct rpc_rqst *req)
return 0;
}
status = -EAGAIN;
+ vm_wait = false;
}
switch (status) {
- case -ENOBUFS:
- break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req, vm_wait);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
fallthrough;
case -EPIPE:
- xs_close(xprt);
+ xprt_force_disconnect(xprt);
status = -ENOTCONN;
}
@@ -935,6 +953,9 @@ static int xs_udp_send_request(struct rpc_rqst *req)
if (!xprt_request_get_cong(xprt, req))
return -EBADSLT;
+ status = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
+ if (status < 0)
+ return status;
req->rq_xtime = ktime_get();
status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
@@ -963,7 +984,7 @@ process_status:
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_sock_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
@@ -1005,7 +1026,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct msghdr msg = {
.msg_flags = XS_SENDMSG_FLAGS,
};
- bool vm_wait = false;
+ bool vm_wait;
unsigned int sent;
int status;
@@ -1025,12 +1046,17 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
xs_tcp_set_socket_timeouts(xprt, transport->sock);
+ xs_set_srcport(transport, transport->sock);
+
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
req->rq_xtime = ktime_get();
tcp_sock_set_cork(transport->inet, true);
- while (1) {
+
+ vm_wait = sk_stream_is_writeable(transport->inet) ? true : false;
+
+ do {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
@@ -1051,31 +1077,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
WARN_ON_ONCE(sent == 0 && status == 0);
- if (status == -EAGAIN ) {
- /*
- * Return EAGAIN if we're sure we're hitting the
- * socket send buffer limits.
- */
- if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
- break;
- /*
- * Did we hit a memory allocation failure?
- */
- if (sent == 0) {
- status = -ENOBUFS;
- if (vm_wait)
- break;
- /* Retry, knowing now that we're below the
- * socket send buffer limit
- */
- vm_wait = true;
- }
- continue;
- }
- if (status < 0)
- break;
- vm_wait = false;
- }
+ if (sent > 0)
+ vm_wait = false;
+
+ } while (status == 0);
switch (status) {
case -ENOTSOCK:
@@ -1083,7 +1088,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req, vm_wait);
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -1124,6 +1129,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
+ clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
}
static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
@@ -1179,6 +1185,16 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (sk == NULL)
return;
+ /*
+ * Make sure we're calling this in a context from which it is safe
+ * to call __fput_sync(). In practice that means rpciod and the
+ * system workqueue.
+ */
+ if (!(current->flags & PF_WQ_WORKER)) {
+ WARN_ON_ONCE(1);
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ return;
+ }
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
@@ -1202,7 +1218,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
- fput(filp);
+ __fput_sync(filp);
xprt_disconnect_done(xprt);
}
@@ -1331,7 +1347,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
if (sk == NULL)
goto out;
for (;;) {
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (skb == NULL)
break;
xs_udp_data_read_skb(&transport->xprt, sk, skb);
@@ -1354,7 +1370,7 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
}
/**
- * xs_data_ready - "data ready" callback for UDP sockets
+ * xs_data_ready - "data ready" callback for sockets
* @sk: socket with data to read
*
*/
@@ -1362,11 +1378,13 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
- dprintk("RPC: xs_data_ready...\n");
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
struct sock_xprt *transport = container_of(xprt,
struct sock_xprt, xprt);
+
+ trace_xs_data_ready(xprt);
+
transport->old_data_ready(sk);
/* Any data means we had a useful conversation, so
* then we don't need to delay the next reconnect
@@ -1395,6 +1413,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
+ * xs_local_state_change - callback to handle AF_LOCAL socket state changes
+ * @sk: socket whose state has changed
+ *
+ */
+static void xs_local_state_change(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+
+ if (!(xprt = xprt_from_sock(sk)))
+ return;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ if (sk->sk_shutdown & SHUTDOWN_MASK) {
+ clear_bit(XPRT_CONNECTED, &xprt->state);
+ /* Trigger the socket release */
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
+ }
+}
+
+/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
*
@@ -1470,7 +1508,6 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
- struct socket_wq *wq;
struct sock_xprt *transport;
struct rpc_xprt *xprt;
@@ -1481,15 +1518,10 @@ static void xs_write_space(struct sock *sk)
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
transport = container_of(xprt, struct sock_xprt, xprt);
- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
- goto out;
-
+ if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state))
+ return;
xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE);
sk->sk_write_pending--;
-out:
- rcu_read_unlock();
}
/**
@@ -1587,7 +1619,7 @@ static int xs_get_random_port(void)
if (max < min)
return -EADDRINUSE;
range = max - min + 1;
- rand = (unsigned short) prandom_u32() % range;
+ rand = prandom_u32_max(range);
return rand + min;
}
@@ -1638,12 +1670,35 @@ static int xs_get_srcport(struct sock_xprt *transport)
return port;
}
-unsigned short get_srcport(struct rpc_xprt *xprt)
+static unsigned short xs_sock_srcport(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
+ unsigned short ret = 0;
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock)
+ ret = xs_sock_getport(sock->sock);
+ mutex_unlock(&sock->recv_mutex);
+ return ret;
+}
+
+static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen)
{
struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
- return xs_sock_getport(sock->sock);
+ union {
+ struct sockaddr sa;
+ struct sockaddr_storage st;
+ } saddr;
+ int ret = -ENOTCONN;
+
+ mutex_lock(&sock->recv_mutex);
+ if (sock->sock) {
+ ret = kernel_getsockname(sock->sock, &saddr.sa);
+ if (ret >= 0)
+ ret = snprintf(buf, buflen, "%pISc", &saddr.sa);
+ }
+ mutex_unlock(&sock->recv_mutex);
+ return ret;
}
-EXPORT_SYMBOL(get_srcport);
static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
{
@@ -1825,7 +1880,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sock_set_flag(sk, SOCK_FASYNC);
+ sk->sk_state_change = xs_local_state_change;
sk->sk_error_report = xs_error_report;
xprt_clear_connected(xprt);
@@ -1910,7 +1965,10 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
- if (RPC_IS_ASYNC(task)) {
+ if (transport->file)
+ goto force_disconnect;
+
+ if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
* filesystem namespace of the process making the rpc
@@ -1920,20 +1978,25 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
* we'll need to figure out how to pass a namespace to
* connect.
*/
- task->tk_rpc_status = -ENOTCONN;
- rpc_exit(task, -ENOTCONN);
- return;
+ rpc_task_set_rpc_status(task, -ENOTCONN);
+ goto out_wake;
}
ret = xs_local_setup_socket(transport);
if (ret && !RPC_IS_SOFTCONN(task))
msleep_interruptible(15000);
+ return;
+force_disconnect:
+ xprt_force_disconnect(xprt);
+out_wake:
+ xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
/*
- * Note that this should be called with XPRT_LOCKED held (or when we otherwise
- * know that we have exclusive access to the socket), to guard against
- * races with xs_reset_transport.
+ * Note that this should be called with XPRT_LOCKED held, or recv_mutex
+ * held, or when we otherwise know that we have exclusive access to the
+ * socket, to guard against races with xs_reset_transport.
*/
static void xs_set_memalloc(struct rpc_xprt *xprt)
{
@@ -1962,13 +2025,11 @@ xs_enable_swap(struct rpc_xprt *xprt)
{
struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
- if (atomic_inc_return(&xprt->swapper) != 1)
- return 0;
- if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
- return -ERESTARTSYS;
- if (xs->inet)
+ mutex_lock(&xs->recv_mutex);
+ if (atomic_inc_return(&xprt->swapper) == 1 &&
+ xs->inet)
sk_set_memalloc(xs->inet);
- xprt_release_xprt(xprt, NULL);
+ mutex_unlock(&xs->recv_mutex);
return 0;
}
@@ -1984,13 +2045,11 @@ xs_disable_swap(struct rpc_xprt *xprt)
{
struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt);
- if (!atomic_dec_and_test(&xprt->swapper))
- return;
- if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE))
- return;
- if (xs->inet)
+ mutex_lock(&xs->recv_mutex);
+ if (atomic_dec_and_test(&xprt->swapper) &&
+ xs->inet)
sk_clear_memalloc(xs->inet);
- xprt_release_xprt(xprt, NULL);
+ mutex_unlock(&xs->recv_mutex);
}
#else
static void xs_set_memalloc(struct rpc_xprt *xprt)
@@ -2023,7 +2082,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
- sock_set_flag(sk, SOCK_FASYNC);
xprt_set_connected(xprt);
@@ -2047,7 +2105,10 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock;
int status = -EIO;
+ unsigned int pflags = current->flags;
+ if (atomic_read(&xprt->swapper))
+ current->flags |= PF_MEMALLOC;
sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_DGRAM,
IPPROTO_UDP, false);
@@ -2067,6 +2128,7 @@ out:
xprt_clear_connecting(xprt);
xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/**
@@ -2186,7 +2248,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
- sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
/* socket options */
@@ -2226,11 +2287,19 @@ static void xs_tcp_setup_socket(struct work_struct *work)
struct socket *sock = transport->sock;
struct rpc_xprt *xprt = &transport->xprt;
int status;
+ unsigned int pflags = current->flags;
+
+ if (atomic_read(&xprt->swapper))
+ current->flags |= PF_MEMALLOC;
- if (!sock) {
- sock = xs_create_sock(xprt, transport,
- xs_addr(xprt)->sa_family, SOCK_STREAM,
- IPPROTO_TCP, true);
+ if (xprt_connected(xprt))
+ goto out;
+ if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT,
+ &transport->sock_state) ||
+ !sock) {
+ xs_reset_transport(transport);
+ sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family,
+ SOCK_STREAM, IPPROTO_TCP, true);
if (IS_ERR(sock)) {
xprt_wake_pending_tasks(xprt, PTR_ERR(sock));
goto out;
@@ -2250,10 +2319,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
sock->sk->sk_state);
switch (status) {
case 0:
- xs_set_srcport(transport, sock);
- fallthrough;
case -EINPROGRESS:
/* SYN_SENT! */
+ set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state);
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
fallthrough;
@@ -2291,6 +2359,7 @@ out:
xprt_clear_connecting(xprt);
out_unlock:
xprt_unlock_connect(xprt, transport);
+ current_restore_flags(pflags, PF_MEMALLOC);
}
/**
@@ -2314,13 +2383,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
- if (transport->sock != NULL && !xprt_connecting(xprt)) {
+ if (transport->sock != NULL) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
- "seconds\n",
- xprt, xprt->reestablish_timeout / HZ);
-
- /* Start by resetting any existing state */
- xs_reset_transport(transport);
+ "seconds\n", xprt, xprt->reestablish_timeout / HZ);
delay = xprt_reconnect_delay(xprt);
xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
@@ -2482,7 +2547,7 @@ static int bc_malloc(struct rpc_task *task)
return -EINVAL;
}
- page = alloc_page(GFP_KERNEL);
+ page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (!page)
return -ENOMEM;
@@ -2520,6 +2585,9 @@ static int bc_sendto(struct rpc_rqst *req)
int err;
req->rq_xtime = ktime_get();
+ err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
+ if (err < 0)
+ return err;
err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
@@ -2616,6 +2684,8 @@ static const struct rpc_xprt_ops xs_udp_ops = {
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
+ .get_srcaddr = xs_sock_srcaddr,
+ .get_srcport = xs_sock_srcport,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
.send_request = xs_udp_send_request,
@@ -2638,6 +2708,8 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
+ .get_srcaddr = xs_sock_srcaddr,
+ .get_srcport = xs_sock_srcport,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
@@ -2796,9 +2868,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
- ret = ERR_PTR(xs_local_setup_socket(transport));
- if (ret)
- goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index b62565278fac..8cc42aea19c7 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -64,7 +64,7 @@ void switchdev_deferred_process(void)
while ((dfitem = switchdev_deferred_dequeue())) {
dfitem->func(dfitem->dev, dfitem->data);
- dev_put_track(dfitem->dev, &dfitem->dev_tracker);
+ netdev_put(dfitem->dev, &dfitem->dev_tracker);
kfree(dfitem);
}
}
@@ -85,13 +85,13 @@ static int switchdev_deferred_enqueue(struct net_device *dev,
{
struct switchdev_deferred_item *dfitem;
- dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
+ dfitem = kmalloc(struct_size(dfitem, data, data_len), GFP_ATOMIC);
if (!dfitem)
return -ENOMEM;
dfitem->dev = dev;
dfitem->func = func;
memcpy(dfitem->data, data, data_len);
- dev_hold_track(dev, &dfitem->dev_tracker, GFP_ATOMIC);
+ netdev_hold(dev, &dfitem->dev_tracker, GFP_ATOMIC);
spin_lock_bh(&deferred_lock);
list_add_tail(&dfitem->list, &deferred);
spin_unlock_bh(&deferred_lock);
@@ -409,6 +409,27 @@ static int switchdev_lower_dev_walk(struct net_device *lower_dev,
}
static struct net_device *
+switchdev_lower_dev_find_rcu(struct net_device *dev,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev))
+{
+ struct switchdev_nested_priv switchdev_priv = {
+ .check_cb = check_cb,
+ .foreign_dev_check_cb = foreign_dev_check_cb,
+ .dev = dev,
+ .lower_dev = NULL,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &switchdev_priv,
+ };
+
+ netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+
+ return switchdev_priv.lower_dev;
+}
+
+static struct net_device *
switchdev_lower_dev_find(struct net_device *dev,
bool (*check_cb)(const struct net_device *dev),
bool (*foreign_dev_check_cb)(const struct net_device *dev,
@@ -424,7 +445,7 @@ switchdev_lower_dev_find(struct net_device *dev,
.data = &switchdev_priv,
};
- netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+ netdev_walk_all_lower_dev(dev, switchdev_lower_dev_walk, &priv);
return switchdev_priv.lower_dev;
}
@@ -437,63 +458,40 @@ static int __switchdev_handle_fdb_event_to_device(struct net_device *dev,
const struct net_device *foreign_dev),
int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
- unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info))
+ const struct switchdev_notifier_fdb_info *fdb_info))
{
const struct switchdev_notifier_info *info = &fdb_info->info;
- struct net_device *br, *lower_dev;
+ struct net_device *br, *lower_dev, *switchdev;
struct list_head *iter;
int err = -EOPNOTSUPP;
if (check_cb(dev))
return mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
- if (netif_is_lag_master(dev)) {
- if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
- goto maybe_bridged_with_us;
-
- /* This is a LAG interface that we offload */
- if (!lag_mod_cb)
- return -EOPNOTSUPP;
-
- return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
- }
-
/* Recurse through lower interfaces in case the FDB entry is pointing
- * towards a bridge device.
+ * towards a bridge or a LAG device.
*/
- if (netif_is_bridge_master(dev)) {
- if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
- return 0;
-
- /* This is a bridge interface that we offload */
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- /* Do not propagate FDB entries across bridges */
- if (netif_is_bridge_master(lower_dev))
- continue;
-
- /* Bridge ports might be either us, or LAG interfaces
- * that we offload.
- */
- if (!check_cb(lower_dev) &&
- !switchdev_lower_dev_find(lower_dev, check_cb,
- foreign_dev_check_cb))
- continue;
-
- err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
- event, fdb_info, check_cb,
- foreign_dev_check_cb,
- mod_cb, lag_mod_cb);
- if (err && err != -EOPNOTSUPP)
- return err;
- }
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
- return 0;
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find_rcu(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
+ event, fdb_info, check_cb,
+ foreign_dev_check_cb,
+ mod_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
}
-maybe_bridged_with_us:
/* Event is neither on a bridge nor a LAG. Check whether it is on an
* interface that is in a bridge with us.
*/
@@ -501,12 +499,16 @@ maybe_bridged_with_us:
if (!br || !netif_is_bridge_master(br))
return 0;
- if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ switchdev = switchdev_lower_dev_find_rcu(br, check_cb, foreign_dev_check_cb);
+ if (!switchdev)
return 0;
+ if (!foreign_dev_check_cb(switchdev, dev))
+ return err;
+
return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info,
check_cb, foreign_dev_check_cb,
- mod_cb, lag_mod_cb);
+ mod_cb);
}
int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
@@ -516,16 +518,13 @@ int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long e
const struct net_device *foreign_dev),
int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info),
- int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
- unsigned long event, const void *ctx,
- const struct switchdev_notifier_fdb_info *fdb_info))
+ const struct switchdev_notifier_fdb_info *fdb_info))
{
int err;
err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info,
check_cb, foreign_dev_check_cb,
- mod_cb, lag_mod_cb);
+ mod_cb);
if (err == -EOPNOTSUPP)
err = 0;
@@ -536,13 +535,15 @@ EXPORT_SYMBOL_GPL(switchdev_handle_fdb_event_to_device);
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
struct switchdev_notifier_info *info = &port_obj_info->info;
+ struct net_device *br, *lower_dev, *switchdev;
struct netlink_ext_ack *extack;
- struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
@@ -566,15 +567,46 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
if (netif_is_bridge_master(lower_dev))
continue;
+ /* When searching for switchdev interfaces that are neighbors
+ * of foreign ones, and @dev is a bridge, do not recurse on the
+ * foreign interface again, it was already visited.
+ */
+ if (foreign_dev_check_cb && !check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb, foreign_dev_check_cb))
+ continue;
+
err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
- check_cb, add_cb);
+ check_cb, foreign_dev_check_cb,
+ add_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
- return err;
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ if (!foreign_dev_check_cb)
+ return err;
+
+ br = netdev_master_upper_dev_get(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return err;
+
+ switchdev = switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb);
+ if (!switchdev)
+ return err;
+
+ if (!foreign_dev_check_cb(switchdev, dev))
+ return err;
+
+ return __switchdev_handle_port_obj_add(br, port_obj_info, check_cb,
+ foreign_dev_check_cb, add_cb);
}
+/* Pass through a port object addition, if @dev passes @check_cb, or replicate
+ * it towards all lower interfaces of @dev that pass @check_cb, if @dev is a
+ * bridge or a LAG.
+ */
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
@@ -585,21 +617,46 @@ int switchdev_handle_port_obj_add(struct net_device *dev,
int err;
err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
- add_cb);
+ NULL, add_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
+/* Same as switchdev_handle_port_obj_add(), except if object is notified on a
+ * @dev that passes @foreign_dev_check_cb, it is replicated towards all devices
+ * that pass @check_cb and are in the same bridge as @dev.
+ */
+int switchdev_handle_port_obj_add_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack))
+{
+ int err;
+
+ err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
+ foreign_dev_check_cb, add_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add_foreign);
+
static int __switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
struct switchdev_notifier_info *info = &port_obj_info->info;
- struct net_device *lower_dev;
+ struct net_device *br, *lower_dev, *switchdev;
struct list_head *iter;
int err = -EOPNOTSUPP;
@@ -621,15 +678,46 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev,
if (netif_is_bridge_master(lower_dev))
continue;
+ /* When searching for switchdev interfaces that are neighbors
+ * of foreign ones, and @dev is a bridge, do not recurse on the
+ * foreign interface again, it was already visited.
+ */
+ if (foreign_dev_check_cb && !check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb, foreign_dev_check_cb))
+ continue;
+
err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
- check_cb, del_cb);
+ check_cb, foreign_dev_check_cb,
+ del_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
- return err;
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ if (!foreign_dev_check_cb)
+ return err;
+
+ br = netdev_master_upper_dev_get(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return err;
+
+ switchdev = switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb);
+ if (!switchdev)
+ return err;
+
+ if (!foreign_dev_check_cb(switchdev, dev))
+ return err;
+
+ return __switchdev_handle_port_obj_del(br, port_obj_info, check_cb,
+ foreign_dev_check_cb, del_cb);
}
+/* Pass through a port object deletion, if @dev passes @check_cb, or replicate
+ * it towards all lower interfaces of @dev that pass @check_cb, if @dev is a
+ * bridge or a LAG.
+ */
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
@@ -639,13 +727,35 @@ int switchdev_handle_port_obj_del(struct net_device *dev,
int err;
err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
- del_cb);
+ NULL, del_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
+/* Same as switchdev_handle_port_obj_del(), except if object is notified on a
+ * @dev that passes @foreign_dev_check_cb, it is replicated towards all devices
+ * that pass @check_cb and are in the same bridge as @dev.
+ */
+int switchdev_handle_port_obj_del_foreign(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev, const void *ctx,
+ const struct switchdev_obj *obj))
+{
+ int err;
+
+ err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
+ foreign_dev_check_cb, del_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del_foreign);
+
static int __switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 473a790f5894..35cac7733fd3 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name,
u32 i;
if (!bearer_name_validate(name, &b_names)) {
- errstr = "illegal name";
NL_SET_ERR_MSG(extack, "Illegal name");
- goto rejected;
+ return res;
}
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
@@ -352,16 +351,18 @@ static int tipc_enable_bearer(struct net *net, const char *name,
goto rejected;
}
- test_and_set_bit_lock(0, &b->up);
- rcu_assign_pointer(tn->bearer_list[bearer_id], b);
- if (skb)
- tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
-
+ /* Create monitoring data before accepting activate messages */
if (tipc_mon_create(net, bearer_id)) {
bearer_disable(net, b);
+ kfree_skb(skb);
return -ENOMEM;
}
+ test_and_set_bit_lock(0, &b->up);
+ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
return res;
@@ -768,7 +769,7 @@ void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts)
skb->pkt_type = PACKET_HOST;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = eth_type_trans(skb, dev);
- netif_rx_ni(skb);
+ netif_rx(skb);
}
}
@@ -787,7 +788,7 @@ int tipc_attach_loopback(struct net *net)
if (!dev)
return -ENODEV;
- dev_hold_track(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL);
+ netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL);
tn->loopback_pt.dev = dev;
tn->loopback_pt.type = htons(ETH_P_TIPC);
tn->loopback_pt.func = tipc_loopback_rcv_pkt;
@@ -800,7 +801,7 @@ void tipc_detach_loopback(struct net *net)
struct tipc_net *tn = tipc_net(net);
dev_remove_pack(&tn->loopback_pt);
- dev_put_track(net->loopback_dev, &tn->loopback_pt.dev_tracker);
+ netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker);
}
/* Caller should hold rtnl_lock to protect the bearer */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 3f4542e0f065..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,10 +109,9 @@ static void __net_exit tipc_exit_net(struct net *net)
struct tipc_net *tn = tipc_net(net);
tipc_detach_loopback(net);
+ tipc_net_stop(net);
/* Make sure the tipc_net_finalize_work() finished */
cancel_work_sync(&tn->work);
- tipc_net_stop(net);
-
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 9325479295b8..f09316a9035f 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -2276,7 +2276,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
struct tipc_aead_key *skey = NULL;
u16 key_gen = msg_key_gen(hdr);
- u16 size = msg_data_sz(hdr);
+ u32 size = msg_data_sz(hdr);
u8 *data = msg_data(hdr);
unsigned int keylen;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index da69e1abf68f..e8630707901e 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -148,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
{
struct net *net = d->net;
struct tipc_net *tn = tipc_net(net);
- bool trial = time_before(jiffies, tn->addr_trial_end);
u32 self = tipc_own_addr(net);
+ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
if (mtyp == DSC_TRIAL_FAIL_MSG) {
if (!trial)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 8d9e09f48f4c..e260c0d557f5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
bool reply = msg_probe(hdr), retransmitted = false;
- u16 dlen = msg_data_sz(hdr), glen = 0;
+ u32 dlen = msg_data_sz(hdr), glen = 0;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
@@ -2214,6 +2214,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
void *data;
trace_tipc_proto_rcv(skb, false, l->name);
+
+ if (dlen > U16_MAX)
+ goto exit;
+
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
@@ -2282,6 +2286,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
case STATE_MSG:
+ /* Validate Gap ACK blocks, drop if invalid */
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+ if (glen > dlen)
+ break;
+
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
@@ -2307,9 +2316,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
}
- /* Receive Gap ACK blocks from peer if any */
- glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
-
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 407619697292..9618e4429f0f 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -160,7 +160,7 @@ static void map_set(u64 *up_map, int i, unsigned int v)
static int map_get(u64 up_map, int i)
{
- return (up_map & (1 << i)) >> i;
+ return (up_map & (1ULL << i)) >> i;
}
static struct tipc_peer *peer_prev(struct tipc_peer *peer)
@@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
state->probing = false;
/* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
if (dlen < dom_rec_len(arrv_dom, 0))
return;
if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 64ae4c4c44f8..c5eec16213d7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -226,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
m->hdr[w] |= htonl(val);
}
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
- u32 temp = msg->hdr[a];
-
- msg->hdr[a] = msg->hdr[b];
- msg->hdr[b] = temp;
-}
-
/*
* Word 0
*/
@@ -480,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
}
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
- msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
static inline u32 msg_lookup_scope(struct tipc_msg *m)
{
return msg_bits(m, 1, 19, 0x3);
@@ -800,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
msg_set_word(m, 2, n);
}
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
- return msg_bits(m, 2, 16, 0xffff);
-}
-
static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
@@ -868,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
msg_set_bits(m, 4, 0, 0xffff, n);
}
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
- msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
static inline u32 msg_bc_netid(struct tipc_msg *m)
{
return msg_word(m, 4);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index bda902caa814..190b49c5cbc3 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -41,14 +41,6 @@
int sysctl_tipc_named_timeout __read_mostly = 2000;
-struct distr_queue_item {
- struct distr_item i;
- u32 dtype;
- u32 node;
- unsigned long expires;
- struct list_head next;
-};
-
/**
* publ_to_item - add publication info to a publication message
* @p: publication info
@@ -313,7 +305,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
ua.sr.type, ua.sr.lower, node);
} else {
- pr_warn("Unrecognized name table message received\n");
+ pr_warn_ratelimited("Unknown name table message received\n");
}
return false;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 01396dd1c899..d1180370fdf4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -967,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
@@ -1202,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l)
kfree(dst);
}
}
-
-int tipc_dest_list_len(struct list_head *l)
-{
- struct tipc_dest *dst;
- int i = 0;
-
- list_for_each_entry(dst, l, list) {
- i++;
- }
- return i;
-}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 259f95e3d99c..3bcd9ef8cee3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -151,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
void tipc_dest_list_purge(struct list_head *l);
-int tipc_dest_list_len(struct list_head *l);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index c447cb5f879e..e8fd257c0e68 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -294,6 +294,7 @@ struct genl_family tipc_genl_family __ro_after_init = {
.module = THIS_MODULE,
.ops = tipc_genl_v2_ops,
.n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
+ .resv_start_op = TIPC_NL_ADDR_LEGACY_GET + 1,
};
int __init tipc_netlink_start(void)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 0749df80454d..dfea27a906f2 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -880,7 +880,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
};
ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
- if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
+ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query))
return -EINVAL;
depth = ntohl(ntq->depth);
@@ -1357,6 +1357,7 @@ static struct genl_family tipc_genl_compat_family __ro_after_init = {
.module = THIS_MODULE,
.small_ops = tipc_genl_compat_ops,
.n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+ .resv_start_op = TIPC_GENL_CMD + 1,
};
int __init tipc_netlink_compat_start(void)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9947b7dfe1d2..b48d97cbbe29 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -403,7 +403,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
u32 flags = n->action_flags;
struct list_head *publ_list;
struct tipc_uaddr ua;
- u32 bearer_id;
+ u32 bearer_id, node;
if (likely(!flags)) {
write_unlock_bh(&n->lock);
@@ -413,7 +413,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
TIPC_LINK_STATE, n->addr, n->addr);
sk.ref = n->link_id;
- sk.node = n->addr;
+ sk.node = tipc_own_addr(net);
+ node = n->addr;
bearer_id = n->link_id & 0xffff;
publ_list = &n->publ_list;
@@ -423,17 +424,17 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
+ tipc_publ_notify(net, publ_list, node, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, sk.node, n->capabilities);
+ tipc_named_node_up(net, node, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
- tipc_mon_peer_up(net, sk.node, bearer_id);
+ tipc_mon_peer_up(net, node, bearer_id);
tipc_nametbl_publish(net, &ua, &sk, sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
- tipc_mon_peer_down(net, sk.node, bearer_id);
+ tipc_mon_peer_down(net, node, bearer_id);
tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
}
}
@@ -471,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
struct tipc_node *n, *temp_node;
- struct tipc_link *l;
unsigned long intv;
int bearer_id;
int i;
@@ -487,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
goto exit;
/* A preliminary node becomes "real" now, refresh its data */
tipc_node_write_lock(n);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link refresh failed, no memory\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
n->preliminary = false;
n->addr = addr;
hlist_del_rcu(&n->hash);
@@ -566,7 +577,16 @@ update:
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- n->bc_entry.link = NULL;
+ if (!preliminary &&
+ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
/* Start a slow timer anyway, crypto needs it */
@@ -1154,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l, *snd_l;
+ struct tipc_link *l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
@@ -1174,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr,
return;
tipc_node_write_lock(n);
- if (unlikely(!n->bc_entry.link)) {
- snd_l = tipc_bc_sndlink(net);
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, peer_id, U16_MAX,
- tipc_link_min_win(snd_l),
- tipc_link_max_win(snd_l),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq, snd_l,
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no mem\n");
- tipc_node_write_unlock_fast(n);
- tipc_node_put(n);
- return;
- }
- }
le = &n->links[b->identity];
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3e63c83e641c..e902b01ea3cb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
@@ -516,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
@@ -2852,7 +2853,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ sk_reset_timer(sk, &sk->sk_timer,
+ jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
@@ -3008,7 +3010,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk)
struct net *net = sock_net(sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
- u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
+ u32 portid = prandom_u32_max(remaining) + TIPC_MIN_PORT;
while (remaining--) {
portid++;
@@ -3749,7 +3751,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 5522865deae9..d92ec92f0b71 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -450,12 +450,19 @@ static void tipc_conn_data_ready(struct sock *sk)
static void tipc_topsrv_accept(struct work_struct *work)
{
struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
- struct socket *lsock = srv->listener;
- struct socket *newsock;
+ struct socket *newsock, *lsock;
struct tipc_conn *con;
struct sock *newsk;
int ret;
+ spin_lock_bh(&srv->idr_lock);
+ if (!srv->listener) {
+ spin_unlock_bh(&srv->idr_lock);
+ return;
+ }
+ lsock = srv->listener;
+ spin_unlock_bh(&srv->idr_lock);
+
while (1) {
ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
if (ret < 0)
@@ -489,7 +496,7 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
srv = sk->sk_user_data;
- if (srv->listener)
+ if (srv)
queue_work(srv->rcv_wq, &srv->awork);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -568,7 +575,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
sub.filter = filter;
- *(u32 *)&sub.usr_handle = port;
+ *(u64 *)&sub.usr_handle = (u64)port;
con = tipc_conn_alloc(tipc_topsrv(net));
if (IS_ERR(con))
@@ -699,8 +706,9 @@ static void tipc_topsrv_stop(struct net *net)
__module_get(lsock->sk->sk_prot_creator->owner);
srv->listener = NULL;
spin_unlock_bh(&srv->idr_lock);
- sock_release(lsock);
+
tipc_topsrv_work_stop(srv);
+ sock_release(lsock);
idr_destroy(&srv->conn_idr);
kfree(srv);
}
diff --git a/net/tls/Makefile b/net/tls/Makefile
index f1ffbfe8968d..e41c800489ac 100644
--- a/net/tls/Makefile
+++ b/net/tls/Makefile
@@ -7,7 +7,7 @@ CFLAGS_trace.o := -I$(src)
obj-$(CONFIG_TLS) += tls.o
-tls-y := tls_main.o tls_sw.o tls_proc.o trace.o
+tls-y := tls_main.o tls_sw.o tls_proc.o trace.o tls_strp.o
tls-$(CONFIG_TLS_TOE) += tls_toe.o
tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
diff --git a/net/tls/tls.h b/net/tls/tls.h
new file mode 100644
index 000000000000..0e840a0c3437
--- /dev/null
+++ b/net/tls/tls.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _TLS_INT_H
+#define _TLS_INT_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+#include <linux/skmsg.h>
+#include <net/tls.h>
+
+#define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \
+ TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT))
+
+#define __TLS_INC_STATS(net, field) \
+ __SNMP_INC_STATS((net)->mib.tls_statistics, field)
+#define TLS_INC_STATS(net, field) \
+ SNMP_INC_STATS((net)->mib.tls_statistics, field)
+#define TLS_DEC_STATS(net, field) \
+ SNMP_DEC_STATS((net)->mib.tls_statistics, field)
+
+/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
+ * allocated or mapped for each TLS record. After encryption, the records are
+ * stores in a linked list.
+ */
+struct tls_rec {
+ struct list_head list;
+ int tx_ready;
+ int tx_flags;
+
+ struct sk_msg msg_plaintext;
+ struct sk_msg msg_encrypted;
+
+ /* AAD | msg_plaintext.sg.data | sg_tag */
+ struct scatterlist sg_aead_in[2];
+ /* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */
+ struct scatterlist sg_aead_out[2];
+
+ char content_type;
+ struct scatterlist sg_content_type;
+
+ char aad_space[TLS_AAD_SPACE_SIZE];
+ u8 iv_data[MAX_IV_SIZE];
+ struct aead_request aead_req;
+ u8 aead_req_ctx[];
+};
+
+int __net_init tls_proc_init(struct net *net);
+void __net_exit tls_proc_fini(struct net *net);
+
+struct tls_context *tls_ctx_create(struct sock *sk);
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
+void update_sk_prot(struct sock *sk, struct tls_context *ctx);
+
+int wait_on_pending_writer(struct sock *sk, long *timeo);
+int tls_sk_query(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen);
+int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
+ unsigned int optlen);
+void tls_err_abort(struct sock *sk, int err);
+
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
+void tls_update_rx_zc_capable(struct tls_context *tls_ctx);
+void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
+void tls_sw_strparser_done(struct tls_context *tls_ctx);
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+int tls_sw_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
+void tls_sw_release_resources_tx(struct sock *sk);
+void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
+void tls_sw_free_resources_rx(struct sock *sk);
+void tls_sw_release_resources_rx(struct sock *sk);
+void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
+int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int flags, int *addr_len);
+bool tls_sw_sock_is_readable(struct sock *sk);
+ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags);
+
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_device_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+int tls_tx_records(struct sock *sk, int flags);
+
+void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
+void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
+
+int tls_process_cmsg(struct sock *sk, struct msghdr *msg,
+ unsigned char *record_type);
+int decrypt_skb(struct sock *sk, struct scatterlist *sgout);
+
+int tls_sw_fallback_init(struct sock *sk,
+ struct tls_offload_context_tx *offload_ctx,
+ struct tls_crypto_info *crypto_info);
+
+int tls_strp_dev_init(void);
+void tls_strp_dev_exit(void);
+
+void tls_strp_done(struct tls_strparser *strp);
+void tls_strp_stop(struct tls_strparser *strp);
+int tls_strp_init(struct tls_strparser *strp, struct sock *sk);
+void tls_strp_data_ready(struct tls_strparser *strp);
+
+void tls_strp_check_rcv(struct tls_strparser *strp);
+void tls_strp_msg_done(struct tls_strparser *strp);
+
+int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb);
+void tls_rx_msg_ready(struct tls_strparser *strp);
+
+void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh);
+int tls_strp_msg_cow(struct tls_sw_context_rx *ctx);
+struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx);
+int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst);
+
+static inline struct tls_msg *tls_msg(struct sk_buff *skb)
+{
+ struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb;
+
+ return &scb->tls;
+}
+
+static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx)
+{
+ DEBUG_NET_WARN_ON_ONCE(!ctx->strp.msg_ready || !ctx->strp.anchor->len);
+ return ctx->strp.anchor;
+}
+
+static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
+{
+ return ctx->strp.msg_ready;
+}
+
+#ifdef CONFIG_TLS_DEVICE
+int tls_device_init(void);
+void tls_device_cleanup(void);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+void tls_device_free_resources_tx(struct sock *sk);
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
+void tls_device_offload_cleanup_rx(struct sock *sk);
+void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
+int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx);
+#else
+static inline int tls_device_init(void) { return 0; }
+static inline void tls_device_cleanup(void) {}
+
+static inline int
+tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void tls_device_free_resources_tx(struct sock *sk) {}
+
+static inline int
+tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void tls_device_offload_cleanup_rx(struct sock *sk) {}
+static inline void
+tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}
+
+static inline int
+tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
+{
+ return 0;
+}
+#endif
+
+int tls_push_sg(struct sock *sk, struct tls_context *ctx,
+ struct scatterlist *sg, u16 first_offset,
+ int flags);
+int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
+ int flags);
+void tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
+
+static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
+{
+ return !!ctx->partially_sent_record;
+}
+
+static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
+{
+ return tls_ctx->pending_open_record_frags;
+}
+
+static inline bool tls_bigint_increment(unsigned char *seq, int len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--) {
+ ++seq[i];
+ if (seq[i] != 0)
+ break;
+ }
+
+ return (i == -1);
+}
+
+static inline void tls_bigint_subtract(unsigned char *seq, int n)
+{
+ u64 rcd_sn;
+ __be64 *p;
+
+ BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8);
+
+ p = (__be64 *)seq;
+ rcd_sn = be64_to_cpu(*p);
+ *p = cpu_to_be64(rcd_sn - n);
+}
+
+static inline void
+tls_advance_record_sn(struct sock *sk, struct tls_prot_info *prot,
+ struct cipher_context *ctx)
+{
+ if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
+ tls_err_abort(sk, -EBADMSG);
+
+ if (prot->version != TLS_1_3_VERSION &&
+ prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305)
+ tls_bigint_increment(ctx->iv + prot->salt_size,
+ prot->iv_size);
+}
+
+static inline void
+tls_xor_iv_with_seq(struct tls_prot_info *prot, char *iv, char *seq)
+{
+ int i;
+
+ if (prot->version == TLS_1_3_VERSION ||
+ prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) {
+ for (i = 0; i < 8; i++)
+ iv[i + 4] ^= seq[i];
+ }
+}
+
+static inline void
+tls_fill_prepend(struct tls_context *ctx, char *buf, size_t plaintext_len,
+ unsigned char record_type)
+{
+ struct tls_prot_info *prot = &ctx->prot_info;
+ size_t pkt_len, iv_size = prot->iv_size;
+
+ pkt_len = plaintext_len + prot->tag_size;
+ if (prot->version != TLS_1_3_VERSION &&
+ prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) {
+ pkt_len += iv_size;
+
+ memcpy(buf + TLS_NONCE_OFFSET,
+ ctx->tx.iv + prot->salt_size, iv_size);
+ }
+
+ /* we cover nonce explicit here as well, so buf should be of
+ * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
+ */
+ buf[0] = prot->version == TLS_1_3_VERSION ?
+ TLS_RECORD_TYPE_DATA : record_type;
+ /* Note that VERSION must be TLS_1_2 for both TLS1.2 and TLS1.3 */
+ buf[1] = TLS_1_2_VERSION_MINOR;
+ buf[2] = TLS_1_2_VERSION_MAJOR;
+ /* we can use IV for nonce explicit according to spec */
+ buf[3] = pkt_len >> 8;
+ buf[4] = pkt_len & 0xFF;
+}
+
+static inline
+void tls_make_aad(char *buf, size_t size, char *record_sequence,
+ unsigned char record_type, struct tls_prot_info *prot)
+{
+ if (prot->version != TLS_1_3_VERSION) {
+ memcpy(buf, record_sequence, prot->rec_seq_size);
+ buf += 8;
+ } else {
+ size += prot->tag_size;
+ }
+
+ buf[0] = prot->version == TLS_1_3_VERSION ?
+ TLS_RECORD_TYPE_DATA : record_type;
+ buf[1] = TLS_1_2_VERSION_MAJOR;
+ buf[2] = TLS_1_2_VERSION_MINOR;
+ buf[3] = size >> 8;
+ buf[4] = size & 0xFF;
+}
+
+#endif
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index b932469ee69c..a03d66046ca3 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -38,6 +38,7 @@
#include <net/tcp.h>
#include <net/tls.h>
+#include "tls.h"
#include "trace.h"
/* device_offload_lock is used to synchronize tls_dev_add
@@ -45,10 +46,8 @@
*/
static DECLARE_RWSEM(device_offload_lock);
-static void tls_device_gc_task(struct work_struct *work);
+static struct workqueue_struct *destruct_wq __read_mostly;
-static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task);
-static LIST_HEAD(tls_device_gc_list);
static LIST_HEAD(tls_device_list);
static LIST_HEAD(tls_device_down_list);
static DEFINE_SPINLOCK(tls_device_lock);
@@ -67,44 +66,58 @@ static void tls_device_free_ctx(struct tls_context *ctx)
tls_ctx_free(NULL, ctx);
}
-static void tls_device_gc_task(struct work_struct *work)
+static void tls_device_tx_del_task(struct work_struct *work)
{
- struct tls_context *ctx, *tmp;
- unsigned long flags;
- LIST_HEAD(gc_list);
-
- spin_lock_irqsave(&tls_device_lock, flags);
- list_splice_init(&tls_device_gc_list, &gc_list);
- spin_unlock_irqrestore(&tls_device_lock, flags);
-
- list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
- struct net_device *netdev = ctx->netdev;
+ struct tls_offload_context_tx *offload_ctx =
+ container_of(work, struct tls_offload_context_tx, destruct_work);
+ struct tls_context *ctx = offload_ctx->ctx;
+ struct net_device *netdev;
- if (netdev && ctx->tx_conf == TLS_HW) {
- netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
- TLS_OFFLOAD_CTX_DIR_TX);
- dev_put(netdev);
- ctx->netdev = NULL;
- }
+ /* Safe, because this is the destroy flow, refcount is 0, so
+ * tls_device_down can't store this field in parallel.
+ */
+ netdev = rcu_dereference_protected(ctx->netdev,
+ !refcount_read(&ctx->refcount));
- list_del(&ctx->list);
- tls_device_free_ctx(ctx);
- }
+ netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX);
+ dev_put(netdev);
+ ctx->netdev = NULL;
+ tls_device_free_ctx(ctx);
}
static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
{
+ struct net_device *netdev;
unsigned long flags;
+ bool async_cleanup;
spin_lock_irqsave(&tls_device_lock, flags);
- list_move_tail(&ctx->list, &tls_device_gc_list);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount))) {
+ spin_unlock_irqrestore(&tls_device_lock, flags);
+ return;
+ }
+
+ list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */
- /* schedule_work inside the spinlock
- * to make sure tls_device_down waits for that work.
+ /* Safe, because this is the destroy flow, refcount is 0, so
+ * tls_device_down can't store this field in parallel.
*/
- schedule_work(&tls_device_gc_work);
+ netdev = rcu_dereference_protected(ctx->netdev,
+ !refcount_read(&ctx->refcount));
+ async_cleanup = netdev && ctx->tx_conf == TLS_HW;
+ if (async_cleanup) {
+ struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx);
+
+ /* queue_work inside the spinlock
+ * to make sure tls_device_down waits for that work.
+ */
+ queue_work(destruct_wq, &offload_ctx->destruct_work);
+ }
spin_unlock_irqrestore(&tls_device_lock, flags);
+
+ if (!async_cleanup)
+ tls_device_free_ctx(ctx);
}
/* We assume that the socket is already connected */
@@ -194,8 +207,7 @@ void tls_device_sk_destruct(struct sock *sk)
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
@@ -231,7 +243,8 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
trace_tls_device_tx_resync_send(sk, seq, rcd_sn);
down_read(&device_offload_lock);
- netdev = tls_ctx->netdev;
+ netdev = rcu_dereference_protected(tls_ctx->netdev,
+ lockdep_is_held(&device_offload_lock));
if (netdev)
err = netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq,
rcd_sn,
@@ -411,10 +424,16 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
return 0;
}
+union tls_iter_offset {
+ struct iov_iter *msg_iter;
+ int offset;
+};
+
static int tls_push_data(struct sock *sk,
- struct iov_iter *msg_iter,
+ union tls_iter_offset iter_offset,
size_t size, int flags,
- unsigned char record_type)
+ unsigned char record_type,
+ struct page *zc_page)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -480,14 +499,25 @@ handle_error:
}
record = ctx->open_record;
- copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
- copy = min_t(size_t, copy, (max_open_record_len - record->len));
- rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
- if (rc)
- goto handle_error;
- tls_append_frag(record, pfrag, copy);
+ copy = min_t(size_t, size, max_open_record_len - record->len);
+ if (copy && zc_page) {
+ struct page_frag zc_pfrag;
+
+ zc_pfrag.page = zc_page;
+ zc_pfrag.offset = iter_offset.offset;
+ zc_pfrag.size = copy;
+ tls_append_frag(record, &zc_pfrag, copy);
+ } else if (copy) {
+ copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
+
+ rc = tls_device_copy_data(page_address(pfrag->page) +
+ pfrag->offset, copy,
+ iter_offset.msg_iter);
+ if (rc)
+ goto handle_error;
+ tls_append_frag(record, pfrag, copy);
+ }
size -= copy;
if (!size) {
@@ -538,19 +568,20 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ union tls_iter_offset iter;
int rc;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
if (unlikely(msg->msg_controllen)) {
- rc = tls_proccess_cmsg(sk, msg, &record_type);
+ rc = tls_process_cmsg(sk, msg, &record_type);
if (rc)
goto out;
}
- rc = tls_push_data(sk, &msg->msg_iter, size,
- msg->msg_flags, record_type);
+ iter.msg_iter = &msg->msg_iter;
+ rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL);
out:
release_sock(sk);
@@ -562,7 +593,8 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct iov_iter msg_iter;
+ union tls_iter_offset iter_offset;
+ struct iov_iter msg_iter;
char *kaddr;
struct kvec iov;
int rc;
@@ -578,12 +610,20 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
goto out;
}
+ if (tls_ctx->zerocopy_sendfile) {
+ iter_offset.offset = offset;
+ rc = tls_push_data(sk, iter_offset, size,
+ flags, TLS_RECORD_TYPE_DATA, page);
+ goto out;
+ }
+
kaddr = kmap(page);
iov.iov_base = kaddr + offset;
iov.iov_len = size;
iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
- rc = tls_push_data(sk, &msg_iter, size,
- flags, TLS_RECORD_TYPE_DATA);
+ iter_offset.msg_iter = &msg_iter;
+ rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA,
+ NULL);
kunmap(page);
out:
@@ -654,10 +694,12 @@ EXPORT_SYMBOL(tls_get_record);
static int tls_device_push_pending_record(struct sock *sk, int flags)
{
- struct iov_iter msg_iter;
+ union tls_iter_offset iter;
+ struct iov_iter msg_iter;
iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
- return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+ iter.msg_iter = &msg_iter;
+ return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL);
}
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
@@ -683,7 +725,7 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type);
rcu_read_lock();
- netdev = READ_ONCE(tls_ctx->netdev);
+ netdev = rcu_dereference(tls_ctx->netdev);
if (netdev)
netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
TLS_OFFLOAD_CTX_DIR_RX);
@@ -859,43 +901,56 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
}
}
-static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
+static int
+tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
{
- struct strp_msg *rxm = strp_msg(skb);
- int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
- struct sk_buff *skb_iter, *unused;
+ struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
+ const struct tls_cipher_size_desc *cipher_sz;
+ int err, offset, copy, data_len, pos;
+ struct sk_buff *skb, *skb_iter;
struct scatterlist sg[1];
+ struct strp_msg *rxm;
char *orig_buf, *buf;
- orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE +
- TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation);
+ switch (tls_ctx->crypto_recv.info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ case TLS_CIPHER_AES_GCM_256:
+ break;
+ default:
+ return -EINVAL;
+ }
+ cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_recv.info.cipher_type];
+
+ rxm = strp_msg(tls_strp_msg(sw_ctx));
+ orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv,
+ sk->sk_allocation);
if (!orig_buf)
return -ENOMEM;
buf = orig_buf;
- nsg = skb_cow_data(skb, 0, &unused);
- if (unlikely(nsg < 0)) {
- err = nsg;
+ err = tls_strp_msg_cow(sw_ctx);
+ if (unlikely(err))
goto free_buf;
- }
+
+ skb = tls_strp_msg(sw_ctx);
+ rxm = strp_msg(skb);
+ offset = rxm->offset;
sg_init_table(sg, 1);
sg_set_buf(&sg[0], buf,
- rxm->full_len + TLS_HEADER_SIZE +
- TLS_CIPHER_AES_GCM_128_IV_SIZE);
- err = skb_copy_bits(skb, offset, buf,
- TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv);
+ err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_sz->iv);
if (err)
goto free_buf;
/* We are interested only in the decrypted data not the auth */
- err = decrypt_skb(sk, skb, sg);
+ err = decrypt_skb(sk, sg);
if (err != -EBADMSG)
goto free_buf;
else
err = 0;
- data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ data_len = rxm->full_len - cipher_sz->tag;
if (skb_pagelen(skb) > offset) {
copy = min_t(int, skb_pagelen(skb) - offset, data_len);
@@ -944,35 +999,41 @@ free_buf:
return err;
}
-int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
- struct sk_buff *skb, struct strp_msg *rxm)
+int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
{
struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
+ struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
+ struct sk_buff *skb = tls_strp_msg(sw_ctx);
+ struct strp_msg *rxm = strp_msg(skb);
int is_decrypted = skb->decrypted;
int is_encrypted = !is_decrypted;
struct sk_buff *skb_iter;
+ int left;
+ left = rxm->full_len - skb->len;
/* Check if all the data is decrypted already */
- skb_walk_frags(skb, skb_iter) {
+ skb_iter = skb_shinfo(skb)->frag_list;
+ while (skb_iter && left > 0) {
is_decrypted &= skb_iter->decrypted;
is_encrypted &= !skb_iter->decrypted;
+
+ left -= skb_iter->len;
+ skb_iter = skb_iter->next;
}
trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
tls_ctx->rx.rec_seq, rxm->full_len,
is_encrypted, is_decrypted);
- ctx->sw.decrypted |= is_decrypted;
-
if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
if (likely(is_encrypted || is_decrypted))
- return 0;
+ return is_decrypted;
/* After tls_device_down disables the offload, the next SKB will
* likely have initial fragments decrypted, and final ones not
* decrypted. We need to reencrypt that single SKB.
*/
- return tls_device_reencrypt(sk, skb);
+ return tls_device_reencrypt(sk, tls_ctx);
}
/* Return immediately if the record is either entirely plaintext or
@@ -981,7 +1042,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
*/
if (is_decrypted) {
ctx->resync_nh_reset = 1;
- return 0;
+ return is_decrypted;
}
if (is_encrypted) {
tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb);
@@ -989,7 +1050,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
}
ctx->resync_nh_reset = 1;
- return tls_device_reencrypt(sk, skb);
+ return tls_device_reencrypt(sk, tls_ctx);
}
static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
@@ -998,7 +1059,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
if (sk->sk_destruct != tls_device_sk_destruct) {
refcount_set(&ctx->refcount, 1);
dev_hold(netdev);
- ctx->netdev = netdev;
+ RCU_INIT_POINTER(ctx->netdev, netdev);
spin_lock_irq(&tls_device_lock);
list_add_tail(&ctx->list, &tls_device_list);
spin_unlock_irq(&tls_device_lock);
@@ -1010,9 +1071,9 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
{
- u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
+ const struct tls_cipher_size_desc *cipher_sz;
struct tls_record_info *start_marker_record;
struct tls_offload_context_tx *offload_ctx;
struct tls_crypto_info *crypto_info;
@@ -1028,70 +1089,83 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
if (ctx->priv_ctx_tx)
return -EEXIST;
- start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
- if (!start_marker_record)
- return -ENOMEM;
+ netdev = get_netdev_for_sock(sk);
+ if (!netdev) {
+ pr_err_ratelimited("%s: netdev not found\n", __func__);
+ return -EINVAL;
+ }
- offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
- if (!offload_ctx) {
- rc = -ENOMEM;
- goto free_marker_record;
+ if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
+ rc = -EOPNOTSUPP;
+ goto release_netdev;
}
crypto_info = &ctx->crypto_send.info;
if (crypto_info->version != TLS_1_2_VERSION) {
rc = -EOPNOTSUPP;
- goto free_offload_ctx;
+ goto release_netdev;
}
switch (crypto_info->cipher_type) {
case TLS_CIPHER_AES_GCM_128:
- nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
- rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
- salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
rec_seq =
((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
break;
+ case TLS_CIPHER_AES_GCM_256:
+ iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv;
+ rec_seq =
+ ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq;
+ break;
default:
rc = -EINVAL;
- goto free_offload_ctx;
+ goto release_netdev;
}
+ cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
/* Sanity-check the rec_seq_size for stack allocations */
- if (rec_seq_size > TLS_MAX_REC_SEQ_SIZE) {
+ if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
rc = -EINVAL;
- goto free_offload_ctx;
+ goto release_netdev;
}
prot->version = crypto_info->version;
prot->cipher_type = crypto_info->cipher_type;
- prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
- prot->tag_size = tag_size;
+ prot->prepend_size = TLS_HEADER_SIZE + cipher_sz->iv;
+ prot->tag_size = cipher_sz->tag;
prot->overhead_size = prot->prepend_size + prot->tag_size;
- prot->iv_size = iv_size;
- prot->salt_size = salt_size;
- ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- GFP_KERNEL);
+ prot->iv_size = cipher_sz->iv;
+ prot->salt_size = cipher_sz->salt;
+ ctx->tx.iv = kmalloc(cipher_sz->iv + cipher_sz->salt, GFP_KERNEL);
if (!ctx->tx.iv) {
rc = -ENOMEM;
- goto free_offload_ctx;
+ goto release_netdev;
}
- memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+ memcpy(ctx->tx.iv + cipher_sz->salt, iv, cipher_sz->iv);
- prot->rec_seq_size = rec_seq_size;
- ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
+ prot->rec_seq_size = cipher_sz->rec_seq;
+ ctx->tx.rec_seq = kmemdup(rec_seq, cipher_sz->rec_seq, GFP_KERNEL);
if (!ctx->tx.rec_seq) {
rc = -ENOMEM;
goto free_iv;
}
+ start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL);
+ if (!start_marker_record) {
+ rc = -ENOMEM;
+ goto free_rec_seq;
+ }
+
+ offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
+ if (!offload_ctx) {
+ rc = -ENOMEM;
+ goto free_marker_record;
+ }
+
rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info);
if (rc)
- goto free_rec_seq;
+ goto free_offload_ctx;
/* start at rec_seq - 1 to account for the start marker record */
memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn));
@@ -1101,6 +1175,9 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
start_marker_record->len = 0;
start_marker_record->num_frags = 0;
+ INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task);
+ offload_ctx->ctx = ctx;
+
INIT_LIST_HEAD(&offload_ctx->records_list);
list_add_tail(&start_marker_record->list, &offload_ctx->records_list);
spin_lock_init(&offload_ctx->lock);
@@ -1118,18 +1195,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
if (skb)
TCP_SKB_CB(skb)->eor = 1;
- netdev = get_netdev_for_sock(sk);
- if (!netdev) {
- pr_err_ratelimited("%s: netdev not found\n", __func__);
- rc = -EINVAL;
- goto disable_cad;
- }
-
- if (!(netdev->features & NETIF_F_HW_TLS_TX)) {
- rc = -EOPNOTSUPP;
- goto release_netdev;
- }
-
/* Avoid offloading if the device is down
* We don't want to offload new flows after
* the NETDEV_DOWN event
@@ -1167,20 +1232,19 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
release_lock:
up_read(&device_offload_lock);
-release_netdev:
- dev_put(netdev);
-disable_cad:
clean_acked_data_disable(inet_csk(sk));
crypto_free_aead(offload_ctx->aead_send);
-free_rec_seq:
- kfree(ctx->tx.rec_seq);
-free_iv:
- kfree(ctx->tx.iv);
free_offload_ctx:
kfree(offload_ctx);
ctx->priv_ctx_tx = NULL;
free_marker_record:
kfree(start_marker_record);
+free_rec_seq:
+ kfree(ctx->tx.rec_seq);
+free_iv:
+ kfree(ctx->tx.iv);
+release_netdev:
+ dev_put(netdev);
return rc;
}
@@ -1266,7 +1330,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
struct net_device *netdev;
down_read(&device_offload_lock);
- netdev = tls_ctx->netdev;
+ netdev = rcu_dereference_protected(tls_ctx->netdev,
+ lockdep_is_held(&device_offload_lock));
if (!netdev)
goto out;
@@ -1275,7 +1340,7 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
if (tls_ctx->tx_conf != TLS_HW) {
dev_put(netdev);
- tls_ctx->netdev = NULL;
+ rcu_assign_pointer(tls_ctx->netdev, NULL);
} else {
set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags);
}
@@ -1295,7 +1360,11 @@ static int tls_device_down(struct net_device *netdev)
spin_lock_irqsave(&tls_device_lock, flags);
list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) {
- if (ctx->netdev != netdev ||
+ struct net_device *ctx_netdev =
+ rcu_dereference_protected(ctx->netdev,
+ lockdep_is_held(&device_offload_lock));
+
+ if (ctx_netdev != netdev ||
!refcount_inc_not_zero(&ctx->refcount))
continue;
@@ -1312,7 +1381,7 @@ static int tls_device_down(struct net_device *netdev)
/* Stop the RX and TX resync.
* tls_dev_resync must not be called after tls_dev_del.
*/
- WRITE_ONCE(ctx->netdev, NULL);
+ rcu_assign_pointer(ctx->netdev, NULL);
/* Start skipping the RX resync logic completely. */
set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags);
@@ -1345,12 +1414,20 @@ static int tls_device_down(struct net_device *netdev)
/* Device contexts for RX and TX will be freed in on sk_destruct
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+ * Now release the ref taken above.
*/
+ if (refcount_dec_and_test(&ctx->refcount)) {
+ /* sk_destruct ran after tls_device_down took a ref, and
+ * it returned early. Complete the destruction here.
+ */
+ list_del(&ctx->list);
+ tls_device_free_ctx(ctx);
+ }
}
up_write(&device_offload_lock);
- flush_work(&tls_device_gc_work);
+ flush_workqueue(destruct_wq);
return NOTIFY_DONE;
}
@@ -1389,14 +1466,24 @@ static struct notifier_block tls_dev_notifier = {
.notifier_call = tls_dev_event,
};
-void __init tls_device_init(void)
+int __init tls_device_init(void)
{
- register_netdevice_notifier(&tls_dev_notifier);
+ int err;
+
+ destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+ if (!destruct_wq)
+ return -ENOMEM;
+
+ err = register_netdevice_notifier(&tls_dev_notifier);
+ if (err)
+ destroy_workqueue(destruct_wq);
+
+ return err;
}
void __exit tls_device_cleanup(void)
{
unregister_netdevice_notifier(&tls_dev_notifier);
- flush_work(&tls_device_gc_work);
+ destroy_workqueue(destruct_wq);
clean_acked_data_flush();
}
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index e40bedd112b6..cdb391a8754b 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -34,6 +34,8 @@
#include <crypto/scatterwalk.h>
#include <net/ip6_checksum.h>
+#include "tls.h"
+
static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk)
{
struct scatterlist *src = walk->sg;
@@ -52,13 +54,25 @@ static int tls_enc_record(struct aead_request *aead_req,
struct scatter_walk *out, int *in_len,
struct tls_prot_info *prot)
{
- unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE];
+ unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE];
+ const struct tls_cipher_size_desc *cipher_sz;
struct scatterlist sg_in[3];
struct scatterlist sg_out[3];
+ unsigned int buf_size;
u16 len;
int rc;
- len = min_t(int, *in_len, ARRAY_SIZE(buf));
+ switch (prot->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ case TLS_CIPHER_AES_GCM_256:
+ break;
+ default:
+ return -EINVAL;
+ }
+ cipher_sz = &tls_cipher_size_desc[prot->cipher_type];
+
+ buf_size = TLS_HEADER_SIZE + cipher_sz->iv;
+ len = min_t(int, *in_len, buf_size);
scatterwalk_copychunks(buf, in, len, 0);
scatterwalk_copychunks(buf, out, len, 1);
@@ -71,13 +85,11 @@ static int tls_enc_record(struct aead_request *aead_req,
scatterwalk_pagedone(out, 1, 1);
len = buf[4] | (buf[3] << 8);
- len -= TLS_CIPHER_AES_GCM_128_IV_SIZE;
+ len -= cipher_sz->iv;
- tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE,
- (char *)&rcd_sn, buf[0], prot);
+ tls_make_aad(aad, len - cipher_sz->tag, (char *)&rcd_sn, buf[0], prot);
- memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE,
- TLS_CIPHER_AES_GCM_128_IV_SIZE);
+ memcpy(iv + cipher_sz->salt, buf + TLS_HEADER_SIZE, cipher_sz->iv);
sg_init_table(sg_in, ARRAY_SIZE(sg_in));
sg_init_table(sg_out, ARRAY_SIZE(sg_out));
@@ -88,7 +100,7 @@ static int tls_enc_record(struct aead_request *aead_req,
*in_len -= len;
if (*in_len < 0) {
- *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ *in_len += cipher_sz->tag;
/* the input buffer doesn't contain the entire record.
* trim len accordingly. The resulting authentication tag
* will contain garbage, but we don't care, so we won't
@@ -109,7 +121,7 @@ static int tls_enc_record(struct aead_request *aead_req,
scatterwalk_pagedone(out, 1, 1);
}
- len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ len -= cipher_sz->tag;
aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
rc = crypto_aead_encrypt(aead_req);
@@ -232,7 +244,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
s32 *sync_size,
int *resync_sgs)
{
- int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int tcp_payload_offset = skb_tcp_all_headers(skb);
int payload_len = skb->len - tcp_payload_offset;
u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
struct tls_record_info *record;
@@ -297,11 +309,14 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
int sync_size,
void *dummy_buf)
{
+ const struct tls_cipher_size_desc *cipher_sz =
+ &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
+
sg_set_buf(&sg_out[0], dummy_buf, sync_size);
sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
/* Add room for authentication tag produced by crypto */
dummy_buf += sync_size;
- sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ sg_set_buf(&sg_out[2], dummy_buf, cipher_sz->tag);
}
static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
@@ -310,10 +325,11 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
struct sk_buff *skb,
s32 sync_size, u64 rcd_sn)
{
- int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
+ int tcp_payload_offset = skb_tcp_all_headers(skb);
int payload_len = skb->len - tcp_payload_offset;
- void *buf, *iv, *aad, *dummy_buf;
+ const struct tls_cipher_size_desc *cipher_sz;
+ void *buf, *iv, *aad, *dummy_buf, *salt;
struct aead_request *aead_req;
struct sk_buff *nskb = NULL;
int buf_len;
@@ -322,20 +338,26 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
if (!aead_req)
return NULL;
- buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE +
- TLS_CIPHER_AES_GCM_128_IV_SIZE +
- TLS_AAD_SPACE_SIZE +
- sync_size +
- TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+ switch (tls_ctx->crypto_send.info.cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ salt = tls_ctx->crypto_send.aes_gcm_128.salt;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ salt = tls_ctx->crypto_send.aes_gcm_256.salt;
+ break;
+ default:
+ return NULL;
+ }
+ cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
+ buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE +
+ sync_size + cipher_sz->tag;
buf = kmalloc(buf_len, GFP_ATOMIC);
if (!buf)
goto free_req;
iv = buf;
- memcpy(iv, tls_ctx->crypto_send.aes_gcm_128.salt,
- TLS_CIPHER_AES_GCM_128_SALT_SIZE);
- aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE +
- TLS_CIPHER_AES_GCM_128_IV_SIZE;
+ memcpy(iv, salt, cipher_sz->salt);
+ aad = buf + cipher_sz->salt + cipher_sz->iv;
dummy_buf = aad + TLS_AAD_SPACE_SIZE;
nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
@@ -372,7 +394,7 @@ free_nskb:
static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
{
- int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int tcp_payload_offset = skb_tcp_all_headers(skb);
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int payload_len = skb->len - tcp_payload_offset;
@@ -424,7 +446,8 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
struct net_device *dev,
struct sk_buff *skb)
{
- if (dev == tls_get_ctx(sk)->netdev || netif_is_bond_master(dev))
+ if (dev == rcu_dereference_bh(tls_get_ctx(sk)->netdev) ||
+ netif_is_bond_master(dev))
return skb;
return tls_sw_fallback(sk, skb);
@@ -448,6 +471,7 @@ int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info)
{
+ const struct tls_cipher_size_desc *cipher_sz;
const u8 *key;
int rc;
@@ -460,15 +484,23 @@ int tls_sw_fallback_init(struct sock *sk,
goto err_out;
}
- key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key;
+ break;
+ default:
+ return -EINVAL;
+ }
+ cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
- rc = crypto_aead_setkey(offload_ctx->aead_send, key,
- TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+ rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key);
if (rc)
goto free_aead;
- rc = crypto_aead_setauthsize(offload_ctx->aead_send,
- TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_sz->tag);
if (rc)
goto free_aead;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 6bc2879ba637..3735cb00905d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,6 +45,8 @@
#include <net/tls.h>
#include <net/tls_toe.h>
+#include "tls.h"
+
MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
@@ -56,6 +58,23 @@ enum {
TLS_NUM_PROTS,
};
+#define CIPHER_SIZE_DESC(cipher) [cipher] = { \
+ .iv = cipher ## _IV_SIZE, \
+ .key = cipher ## _KEY_SIZE, \
+ .salt = cipher ## _SALT_SIZE, \
+ .tag = cipher ## _TAG_SIZE, \
+ .rec_seq = cipher ## _REC_SEQ_SIZE, \
+}
+
+const struct tls_cipher_size_desc tls_cipher_size_desc[] = {
+ CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128),
+ CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256),
+ CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128),
+ CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305),
+ CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM),
+ CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM),
+};
+
static const struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static const struct proto *saved_tcpv4_prot;
@@ -164,8 +183,8 @@ static int tls_handle_open_record(struct sock *sk, int flags)
return 0;
}
-int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
- unsigned char *record_type)
+int tls_process_cmsg(struct sock *sk, struct msghdr *msg,
+ unsigned char *record_type)
{
struct cmsghdr *cmsg;
int rc = -EINVAL;
@@ -505,6 +524,54 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
rc = -EFAULT;
break;
}
+ case TLS_CIPHER_ARIA_GCM_128: {
+ struct tls12_crypto_info_aria_gcm_128 *
+ crypto_info_aria_gcm_128 =
+ container_of(crypto_info,
+ struct tls12_crypto_info_aria_gcm_128,
+ info);
+
+ if (len != sizeof(*crypto_info_aria_gcm_128)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ lock_sock(sk);
+ memcpy(crypto_info_aria_gcm_128->iv,
+ cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE,
+ TLS_CIPHER_ARIA_GCM_128_IV_SIZE);
+ memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE);
+ release_sock(sk);
+ if (copy_to_user(optval,
+ crypto_info_aria_gcm_128,
+ sizeof(*crypto_info_aria_gcm_128)))
+ rc = -EFAULT;
+ break;
+ }
+ case TLS_CIPHER_ARIA_GCM_256: {
+ struct tls12_crypto_info_aria_gcm_256 *
+ crypto_info_aria_gcm_256 =
+ container_of(crypto_info,
+ struct tls12_crypto_info_aria_gcm_256,
+ info);
+
+ if (len != sizeof(*crypto_info_aria_gcm_256)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ lock_sock(sk);
+ memcpy(crypto_info_aria_gcm_256->iv,
+ cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE,
+ TLS_CIPHER_ARIA_GCM_256_IV_SIZE);
+ memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq,
+ TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE);
+ release_sock(sk);
+ if (copy_to_user(optval,
+ crypto_info_aria_gcm_256,
+ sizeof(*crypto_info_aria_gcm_256)))
+ rc = -EFAULT;
+ break;
+ }
default:
rc = -EINVAL;
}
@@ -513,6 +580,56 @@ out:
return rc;
}
+static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval,
+ int __user *optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ unsigned int value;
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len != sizeof(value))
+ return -EINVAL;
+
+ value = ctx->zerocopy_sendfile;
+ if (copy_to_user(optval, &value, sizeof(value)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval,
+ int __user *optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ int value, len;
+
+ if (ctx->prot_info.version != TLS_1_3_VERSION)
+ return -EINVAL;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < sizeof(value))
+ return -EINVAL;
+
+ lock_sock(sk);
+ value = -EINVAL;
+ if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW)
+ value = ctx->rx_no_pad;
+ release_sock(sk);
+ if (value < 0)
+ return value;
+
+ if (put_user(sizeof(value), optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &value, sizeof(value)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int do_tls_getsockopt(struct sock *sk, int optname,
char __user *optval, int __user *optlen)
{
@@ -524,6 +641,12 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
rc = do_tls_getsockopt_conf(sk, optval, optlen,
optname == TLS_TX);
break;
+ case TLS_TX_ZEROCOPY_RO:
+ rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
+ break;
+ case TLS_RX_EXPECT_NO_PAD:
+ rc = do_tls_getsockopt_no_pad(sk, optval, optlen);
+ break;
default:
rc = -ENOPROTOOPT;
break;
@@ -553,10 +676,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
int rc = 0;
int conf;
- if (sockptr_is_null(optval) || (optlen < sizeof(*crypto_info))) {
- rc = -EINVAL;
- goto out;
- }
+ if (sockptr_is_null(optval) || (optlen < sizeof(*crypto_info)))
+ return -EINVAL;
if (tx) {
crypto_info = &ctx->crypto_send.info;
@@ -567,10 +688,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
}
/* Currently we don't support set crypto info more than one time */
- if (TLS_CRYPTO_INFO_READY(crypto_info)) {
- rc = -EBUSY;
- goto out;
- }
+ if (TLS_CRYPTO_INFO_READY(crypto_info))
+ return -EBUSY;
rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
@@ -614,6 +733,20 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
case TLS_CIPHER_SM4_CCM:
optsize = sizeof(struct tls12_crypto_info_sm4_ccm);
break;
+ case TLS_CIPHER_ARIA_GCM_128:
+ if (crypto_info->version != TLS_1_2_VERSION) {
+ rc = -EINVAL;
+ goto err_crypto_info;
+ }
+ optsize = sizeof(struct tls12_crypto_info_aria_gcm_128);
+ break;
+ case TLS_CIPHER_ARIA_GCM_256:
+ if (crypto_info->version != TLS_1_2_VERSION) {
+ rc = -EINVAL;
+ goto err_crypto_info;
+ }
+ optsize = sizeof(struct tls12_crypto_info_aria_gcm_256);
+ break;
default:
rc = -EINVAL;
goto err_crypto_info;
@@ -671,12 +804,67 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
if (tx) {
ctx->sk_write_space = sk->sk_write_space;
sk->sk_write_space = tls_write_space;
+ } else {
+ struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(ctx);
+
+ tls_strp_check_rcv(&rx_ctx->strp);
}
- goto out;
+ return 0;
err_crypto_info:
memzero_explicit(crypto_info, sizeof(union tls_crypto_context));
-out:
+ return rc;
+}
+
+static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ unsigned int value;
+
+ if (sockptr_is_null(optval) || optlen != sizeof(value))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&value, optval, sizeof(value)))
+ return -EFAULT;
+
+ if (value > 1)
+ return -EINVAL;
+
+ ctx->zerocopy_sendfile = value;
+
+ return 0;
+}
+
+static int do_tls_setsockopt_no_pad(struct sock *sk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ u32 val;
+ int rc;
+
+ if (ctx->prot_info.version != TLS_1_3_VERSION ||
+ sockptr_is_null(optval) || optlen < sizeof(val))
+ return -EINVAL;
+
+ rc = copy_from_sockptr(&val, optval, sizeof(val));
+ if (rc)
+ return -EFAULT;
+ if (val > 1)
+ return -EINVAL;
+ rc = check_zeroed_sockptr(optval, sizeof(val), optlen - sizeof(val));
+ if (rc < 1)
+ return rc == 0 ? -EINVAL : rc;
+
+ lock_sock(sk);
+ rc = -EINVAL;
+ if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) {
+ ctx->rx_no_pad = val;
+ tls_update_rx_zc_capable(ctx);
+ rc = 0;
+ }
+ release_sock(sk);
+
return rc;
}
@@ -693,6 +881,14 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
optname == TLS_TX);
release_sock(sk);
break;
+ case TLS_TX_ZEROCOPY_RO:
+ lock_sock(sk);
+ rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
+ release_sock(sk);
+ break;
+ case TLS_RX_EXPECT_NO_PAD:
+ rc = do_tls_setsockopt_no_pad(sk, optval, optlen);
+ break;
default:
rc = -ENOPROTOOPT;
break;
@@ -878,6 +1074,8 @@ static void tls_update(struct sock *sk, struct proto *p,
{
struct tls_context *ctx;
+ WARN_ON_ONCE(sk->sk_prot == p);
+
ctx = tls_get_ctx(sk);
if (likely(ctx)) {
ctx->sk_write_space = write_space;
@@ -889,6 +1087,23 @@ static void tls_update(struct sock *sk, struct proto *p,
}
}
+static u16 tls_user_config(struct tls_context *ctx, bool tx)
+{
+ u16 config = tx ? ctx->tx_conf : ctx->rx_conf;
+
+ switch (config) {
+ case TLS_BASE:
+ return TLS_CONF_BASE;
+ case TLS_SW:
+ return TLS_CONF_SW;
+ case TLS_HW:
+ return TLS_CONF_HW;
+ case TLS_HW_RECORD:
+ return TLS_CONF_HW_RECORD;
+ }
+ return 0;
+}
+
static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
{
u16 version, cipher_type;
@@ -926,6 +1141,17 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
if (err)
goto nla_failure;
+ if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) {
+ err = nla_put_flag(skb, TLS_INFO_ZC_RO_TX);
+ if (err)
+ goto nla_failure;
+ }
+ if (ctx->rx_no_pad) {
+ err = nla_put_flag(skb, TLS_INFO_RX_NO_PAD);
+ if (err)
+ goto nla_failure;
+ }
+
rcu_read_unlock();
nla_nest_end(skb, start);
return 0;
@@ -945,6 +1171,8 @@ static size_t tls_get_info_size(const struct sock *sk)
nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */
nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
+ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */
+ nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */
0;
return size;
@@ -996,15 +1224,28 @@ static int __init tls_register(void)
if (err)
return err;
- tls_device_init();
+ err = tls_strp_dev_init();
+ if (err)
+ goto err_pernet;
+
+ err = tls_device_init();
+ if (err)
+ goto err_strp;
+
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
+err_strp:
+ tls_strp_dev_exit();
+err_pernet:
+ unregister_pernet_subsys(&tls_proc_ops);
+ return err;
}
static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
+ tls_strp_dev_exit();
tls_device_cleanup();
unregister_pernet_subsys(&tls_proc_ops);
}
diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c
index feeceb0e4cb4..68982728f620 100644
--- a/net/tls/tls_proc.c
+++ b/net/tls/tls_proc.c
@@ -6,6 +6,8 @@
#include <net/snmp.h>
#include <net/tls.h>
+#include "tls.h"
+
#ifdef CONFIG_PROC_FS
static const struct snmp_mib tls_mib_list[] = {
SNMP_MIB_ITEM("TlsCurrTxSw", LINUX_MIB_TLSCURRTXSW),
@@ -18,6 +20,8 @@ static const struct snmp_mib tls_mib_list[] = {
SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE),
SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR),
SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC),
+ SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY),
+ SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL),
SNMP_MIB_SENTINEL
};
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
new file mode 100644
index 000000000000..955ac3e0bf4d
--- /dev/null
+++ b/net/tls/tls_strp.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */
+
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/tls.h>
+
+#include "tls.h"
+
+static struct workqueue_struct *tls_strp_wq;
+
+static void tls_strp_abort_strp(struct tls_strparser *strp, int err)
+{
+ if (strp->stopped)
+ return;
+
+ strp->stopped = 1;
+
+ /* Report an error on the lower socket */
+ strp->sk->sk_err = -err;
+ sk_error_report(strp->sk);
+}
+
+static void tls_strp_anchor_free(struct tls_strparser *strp)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
+ shinfo->frag_list = NULL;
+ consume_skb(strp->anchor);
+ strp->anchor = NULL;
+}
+
+/* Create a new skb with the contents of input copied to its page frags */
+static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+{
+ struct strp_msg *rxm;
+ struct sk_buff *skb;
+ int i, err, offset;
+
+ skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER,
+ &err, strp->sk->sk_allocation);
+ if (!skb)
+ return NULL;
+
+ offset = strp->stm.offset;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
+ skb_frag_address(frag),
+ skb_frag_size(frag)));
+ offset += skb_frag_size(frag);
+ }
+
+ skb_copy_header(skb, strp->anchor);
+ rxm = strp_msg(skb);
+ rxm->offset = 0;
+ return skb;
+}
+
+/* Steal the input skb, input msg is invalid after calling this function */
+struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx)
+{
+ struct tls_strparser *strp = &ctx->strp;
+
+#ifdef CONFIG_TLS_DEVICE
+ DEBUG_NET_WARN_ON_ONCE(!strp->anchor->decrypted);
+#else
+ /* This function turns an input into an output,
+ * that can only happen if we have offload.
+ */
+ WARN_ON(1);
+#endif
+
+ if (strp->copy_mode) {
+ struct sk_buff *skb;
+
+ /* Replace anchor with an empty skb, this is a little
+ * dangerous but __tls_cur_msg() warns on empty skbs
+ * so hopefully we'll catch abuses.
+ */
+ skb = alloc_skb(0, strp->sk->sk_allocation);
+ if (!skb)
+ return NULL;
+
+ swap(strp->anchor, skb);
+ return skb;
+ }
+
+ return tls_strp_msg_make_copy(strp);
+}
+
+/* Force the input skb to be in copy mode. The data ownership remains
+ * with the input skb itself (meaning unpause will wipe it) but it can
+ * be modified.
+ */
+int tls_strp_msg_cow(struct tls_sw_context_rx *ctx)
+{
+ struct tls_strparser *strp = &ctx->strp;
+ struct sk_buff *skb;
+
+ if (strp->copy_mode)
+ return 0;
+
+ skb = tls_strp_msg_make_copy(strp);
+ if (!skb)
+ return -ENOMEM;
+
+ tls_strp_anchor_free(strp);
+ strp->anchor = skb;
+
+ tcp_read_done(strp->sk, strp->stm.full_len);
+ strp->copy_mode = 1;
+
+ return 0;
+}
+
+/* Make a clone (in the skb sense) of the input msg to keep a reference
+ * to the underlying data. The reference-holding skbs get placed on
+ * @dst.
+ */
+int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+
+ if (strp->copy_mode) {
+ struct sk_buff *skb;
+
+ WARN_ON_ONCE(!shinfo->nr_frags);
+
+ /* We can't skb_clone() the anchor, it gets wiped by unpause */
+ skb = alloc_skb(0, strp->sk->sk_allocation);
+ if (!skb)
+ return -ENOMEM;
+
+ __skb_queue_tail(dst, strp->anchor);
+ strp->anchor = skb;
+ } else {
+ struct sk_buff *iter, *clone;
+ int chunk, len, offset;
+
+ offset = strp->stm.offset;
+ len = strp->stm.full_len;
+ iter = shinfo->frag_list;
+
+ while (len > 0) {
+ if (iter->len <= offset) {
+ offset -= iter->len;
+ goto next;
+ }
+
+ chunk = iter->len - offset;
+ offset = 0;
+
+ clone = skb_clone(iter, strp->sk->sk_allocation);
+ if (!clone)
+ return -ENOMEM;
+ __skb_queue_tail(dst, clone);
+
+ len -= chunk;
+next:
+ iter = iter->next;
+ }
+ }
+
+ return 0;
+}
+
+static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
+
+ for (i = 0; i < shinfo->nr_frags; i++)
+ __skb_frag_unref(&shinfo->frags[i], false);
+ shinfo->nr_frags = 0;
+ strp->copy_mode = 0;
+}
+
+static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+ unsigned int offset, size_t in_len)
+{
+ struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
+ struct sk_buff *skb;
+ skb_frag_t *frag;
+ size_t len, chunk;
+ int sz;
+
+ if (strp->msg_ready)
+ return 0;
+
+ skb = strp->anchor;
+ frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
+
+ len = in_len;
+ /* First make sure we got the header */
+ if (!strp->stm.full_len) {
+ /* Assume one page is more than enough for headers */
+ chunk = min_t(size_t, len, PAGE_SIZE - skb_frag_size(frag));
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
+ skb_frag_address(frag) +
+ skb_frag_size(frag),
+ chunk));
+
+ sz = tls_rx_msg_size(strp, strp->anchor);
+ if (sz < 0) {
+ desc->error = sz;
+ return 0;
+ }
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (sz > 0)
+ chunk = min_t(size_t, chunk, sz - skb->len);
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+ skb_frag_size_add(frag, chunk);
+ frag++;
+ len -= chunk;
+ offset += chunk;
+
+ strp->stm.full_len = sz;
+ if (!strp->stm.full_len)
+ goto read_done;
+ }
+
+ /* Load up more data */
+ while (len && strp->stm.full_len > skb->len) {
+ chunk = min_t(size_t, len, strp->stm.full_len - skb->len);
+ chunk = min_t(size_t, chunk, PAGE_SIZE - skb_frag_size(frag));
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
+ skb_frag_address(frag) +
+ skb_frag_size(frag),
+ chunk));
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+ skb_frag_size_add(frag, chunk);
+ frag++;
+ len -= chunk;
+ offset += chunk;
+ }
+
+ if (strp->stm.full_len == skb->len) {
+ desc->count = 0;
+
+ strp->msg_ready = 1;
+ tls_rx_msg_ready(strp);
+ }
+
+read_done:
+ return in_len - len;
+}
+
+static int tls_strp_read_copyin(struct tls_strparser *strp)
+{
+ struct socket *sock = strp->sk->sk_socket;
+ read_descriptor_t desc;
+
+ desc.arg.data = strp;
+ desc.error = 0;
+ desc.count = 1; /* give more than one skb per call */
+
+ /* sk should be locked here, so okay to do read_sock */
+ sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin);
+
+ return desc.error;
+}
+
+static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
+{
+ struct skb_shared_info *shinfo;
+ struct page *page;
+ int need_spc, len;
+
+ /* If the rbuf is small or rcv window has collapsed to 0 we need
+ * to read the data out. Otherwise the connection will stall.
+ * Without pressure threshold of INT_MAX will never be ready.
+ */
+ if (likely(qshort && !tcp_epollin_ready(strp->sk, INT_MAX)))
+ return 0;
+
+ shinfo = skb_shinfo(strp->anchor);
+ shinfo->frag_list = NULL;
+
+ /* If we don't know the length go max plus page for cipher overhead */
+ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
+
+ for (len = need_spc; len > 0; len -= PAGE_SIZE) {
+ page = alloc_page(strp->sk->sk_allocation);
+ if (!page) {
+ tls_strp_flush_anchor_copy(strp);
+ return -ENOMEM;
+ }
+
+ skb_fill_page_desc(strp->anchor, shinfo->nr_frags++,
+ page, 0, 0);
+ }
+
+ strp->copy_mode = 1;
+ strp->stm.offset = 0;
+
+ strp->anchor->len = 0;
+ strp->anchor->data_len = 0;
+ strp->anchor->truesize = round_up(need_spc, PAGE_SIZE);
+
+ tls_strp_read_copyin(strp);
+
+ return 0;
+}
+
+static bool tls_strp_check_no_dup(struct tls_strparser *strp)
+{
+ unsigned int len = strp->stm.offset + strp->stm.full_len;
+ struct sk_buff *skb;
+ u32 seq;
+
+ skb = skb_shinfo(strp->anchor)->frag_list;
+ seq = TCP_SKB_CB(skb)->seq;
+
+ while (skb->len < len) {
+ seq += skb->len;
+ len -= skb->len;
+ skb = skb->next;
+
+ if (TCP_SKB_CB(skb)->seq != seq)
+ return false;
+ }
+
+ return true;
+}
+
+static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len)
+{
+ struct tcp_sock *tp = tcp_sk(strp->sk);
+ struct sk_buff *first;
+ u32 offset;
+
+ first = tcp_recv_skb(strp->sk, tp->copied_seq, &offset);
+ if (WARN_ON_ONCE(!first))
+ return;
+
+ /* Bestow the state onto the anchor */
+ strp->anchor->len = offset + len;
+ strp->anchor->data_len = offset + len;
+ strp->anchor->truesize = offset + len;
+
+ skb_shinfo(strp->anchor)->frag_list = first;
+
+ skb_copy_header(strp->anchor, first);
+ strp->anchor->destructor = NULL;
+
+ strp->stm.offset = offset;
+}
+
+void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
+{
+ struct strp_msg *rxm;
+ struct tls_msg *tlm;
+
+ DEBUG_NET_WARN_ON_ONCE(!strp->msg_ready);
+ DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len);
+
+ if (!strp->copy_mode && force_refresh) {
+ if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len))
+ return;
+
+ tls_strp_load_anchor_with_queue(strp, strp->stm.full_len);
+ }
+
+ rxm = strp_msg(strp->anchor);
+ rxm->full_len = strp->stm.full_len;
+ rxm->offset = strp->stm.offset;
+ tlm = tls_msg(strp->anchor);
+ tlm->control = strp->mark;
+}
+
+/* Called with lock held on lower socket */
+static int tls_strp_read_sock(struct tls_strparser *strp)
+{
+ int sz, inq;
+
+ inq = tcp_inq(strp->sk);
+ if (inq < 1)
+ return 0;
+
+ if (unlikely(strp->copy_mode))
+ return tls_strp_read_copyin(strp);
+
+ if (inq < strp->stm.full_len)
+ return tls_strp_read_copy(strp, true);
+
+ if (!strp->stm.full_len) {
+ tls_strp_load_anchor_with_queue(strp, inq);
+
+ sz = tls_rx_msg_size(strp, strp->anchor);
+ if (sz < 0) {
+ tls_strp_abort_strp(strp, sz);
+ return sz;
+ }
+
+ strp->stm.full_len = sz;
+
+ if (!strp->stm.full_len || inq < strp->stm.full_len)
+ return tls_strp_read_copy(strp, true);
+ }
+
+ if (!tls_strp_check_no_dup(strp))
+ return tls_strp_read_copy(strp, false);
+
+ strp->msg_ready = 1;
+ tls_rx_msg_ready(strp);
+
+ return 0;
+}
+
+void tls_strp_check_rcv(struct tls_strparser *strp)
+{
+ if (unlikely(strp->stopped) || strp->msg_ready)
+ return;
+
+ if (tls_strp_read_sock(strp) == -ENOMEM)
+ queue_work(tls_strp_wq, &strp->work);
+}
+
+/* Lower sock lock held */
+void tls_strp_data_ready(struct tls_strparser *strp)
+{
+ /* This check is needed to synchronize with do_tls_strp_work.
+ * do_tls_strp_work acquires a process lock (lock_sock) whereas
+ * the lock held here is bh_lock_sock. The two locks can be
+ * held by different threads at the same time, but bh_lock_sock
+ * allows a thread in BH context to safely check if the process
+ * lock is held. In this case, if the lock is held, queue work.
+ */
+ if (sock_owned_by_user_nocheck(strp->sk)) {
+ queue_work(tls_strp_wq, &strp->work);
+ return;
+ }
+
+ tls_strp_check_rcv(strp);
+}
+
+static void tls_strp_work(struct work_struct *w)
+{
+ struct tls_strparser *strp =
+ container_of(w, struct tls_strparser, work);
+
+ lock_sock(strp->sk);
+ tls_strp_check_rcv(strp);
+ release_sock(strp->sk);
+}
+
+void tls_strp_msg_done(struct tls_strparser *strp)
+{
+ WARN_ON(!strp->stm.full_len);
+
+ if (likely(!strp->copy_mode))
+ tcp_read_done(strp->sk, strp->stm.full_len);
+ else
+ tls_strp_flush_anchor_copy(strp);
+
+ strp->msg_ready = 0;
+ memset(&strp->stm, 0, sizeof(strp->stm));
+
+ tls_strp_check_rcv(strp);
+}
+
+void tls_strp_stop(struct tls_strparser *strp)
+{
+ strp->stopped = 1;
+}
+
+int tls_strp_init(struct tls_strparser *strp, struct sock *sk)
+{
+ memset(strp, 0, sizeof(*strp));
+
+ strp->sk = sk;
+
+ strp->anchor = alloc_skb(0, GFP_KERNEL);
+ if (!strp->anchor)
+ return -ENOMEM;
+
+ INIT_WORK(&strp->work, tls_strp_work);
+
+ return 0;
+}
+
+/* strp must already be stopped so that tls_strp_recv will no longer be called.
+ * Note that tls_strp_done is not called with the lower socket held.
+ */
+void tls_strp_done(struct tls_strparser *strp)
+{
+ WARN_ON(!strp->stopped);
+
+ cancel_work_sync(&strp->work);
+ tls_strp_anchor_free(strp);
+}
+
+int __init tls_strp_dev_init(void)
+{
+ tls_strp_wq = create_workqueue("tls-strp");
+ if (unlikely(!tls_strp_wq))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void tls_strp_dev_exit(void)
+{
+ destroy_workqueue(tls_strp_wq);
+}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 95e774f1b91f..264cf367e265 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -44,6 +44,25 @@
#include <net/strparser.h>
#include <net/tls.h>
+#include "tls.h"
+
+struct tls_decrypt_arg {
+ struct_group(inargs,
+ bool zc;
+ bool async;
+ u8 tail;
+ );
+
+ struct sk_buff *skb;
+};
+
+struct tls_decrypt_ctx {
+ u8 iv[MAX_IV_SIZE];
+ u8 aad[TLS_MAX_AAD_SIZE];
+ u8 tail;
+ struct scatterlist sg[];
+};
+
noinline void tls_err_abort(struct sock *sk, int err)
{
WARN_ON_ONCE(err >= 0);
@@ -128,32 +147,32 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len)
return __skb_nsg(skb, offset, len, 0);
}
-static int padding_length(struct tls_sw_context_rx *ctx,
- struct tls_prot_info *prot, struct sk_buff *skb)
+static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb,
+ struct tls_decrypt_arg *darg)
{
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
int sub = 0;
/* Determine zero-padding length */
if (prot->version == TLS_1_3_VERSION) {
- char content_type = 0;
+ int offset = rxm->full_len - TLS_TAG_SIZE - 1;
+ char content_type = darg->zc ? darg->tail : 0;
int err;
- int back = 17;
while (content_type == 0) {
- if (back > rxm->full_len - prot->prepend_size)
+ if (offset < prot->prepend_size)
return -EBADMSG;
- err = skb_copy_bits(skb,
- rxm->offset + rxm->full_len - back,
+ err = skb_copy_bits(skb, rxm->offset + offset,
&content_type, 1);
if (err)
return err;
if (content_type)
break;
sub++;
- back++;
+ offset--;
}
- ctx->control = content_type;
+ tlm->control = content_type;
}
return sub;
}
@@ -165,45 +184,22 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
struct tls_context *tls_ctx;
- struct tls_prot_info *prot;
struct scatterlist *sg;
- struct sk_buff *skb;
unsigned int pages;
- int pending;
+ struct sock *sk;
- skb = (struct sk_buff *)req->data;
- tls_ctx = tls_get_ctx(skb->sk);
+ sk = (struct sock *)req->data;
+ tls_ctx = tls_get_ctx(sk);
ctx = tls_sw_ctx_rx(tls_ctx);
- prot = &tls_ctx->prot_info;
/* Propagate if there was an err */
if (err) {
if (err == -EBADMSG)
- TLS_INC_STATS(sock_net(skb->sk),
- LINUX_MIB_TLSDECRYPTERROR);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
ctx->async_wait.err = err;
- tls_err_abort(skb->sk, err);
- } else {
- struct strp_msg *rxm = strp_msg(skb);
- int pad;
-
- pad = padding_length(ctx, prot, skb);
- if (pad < 0) {
- ctx->async_wait.err = pad;
- tls_err_abort(skb->sk, pad);
- } else {
- rxm->full_len -= pad;
- rxm->offset += prot->prepend_size;
- rxm->full_len -= prot->overhead_size;
- }
+ tls_err_abort(sk, err);
}
- /* After using skb->sk to propagate sk through crypto async callback
- * we need to NULL it again.
- */
- skb->sk = NULL;
-
-
/* Free the destination pages if skb was not decrypted inplace */
if (sgout != sgin) {
/* Skip the first S/G entry as it points to AAD */
@@ -217,21 +213,18 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
kfree(aead_req);
spin_lock_bh(&ctx->decrypt_compl_lock);
- pending = atomic_dec_return(&ctx->decrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (!atomic_dec_return(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->decrypt_compl_lock);
}
static int tls_do_decryption(struct sock *sk,
- struct sk_buff *skb,
struct scatterlist *sgin,
struct scatterlist *sgout,
char *iv_recv,
size_t data_len,
struct aead_request *aead_req,
- bool async)
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -244,17 +237,10 @@ static int tls_do_decryption(struct sock *sk,
data_len + prot->tag_size,
(u8 *)iv_recv);
- if (async) {
- /* Using skb->sk to push sk through to crypto async callback
- * handler. This allows propagating errors up to the socket
- * if needed. It _must_ be cleared in the async handler
- * before consume_skb is called. We _know_ skb->sk is NULL
- * because it is a clone from strparser.
- */
- skb->sk = sk;
+ if (darg->async) {
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
- tls_decrypt_done, skb);
+ tls_decrypt_done, sk);
atomic_inc(&ctx->decrypt_pending);
} else {
aead_request_set_callback(aead_req,
@@ -264,14 +250,12 @@ static int tls_do_decryption(struct sock *sk,
ret = crypto_aead_decrypt(aead_req);
if (ret == -EINPROGRESS) {
- if (async)
- return ret;
+ if (darg->async)
+ return 0;
ret = crypto_wait_req(ret, &ctx->async_wait);
}
-
- if (async)
- atomic_dec(&ctx->decrypt_pending);
+ darg->async = false;
return ret;
}
@@ -521,7 +505,8 @@ static int tls_do_encryption(struct sock *sk,
memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv,
prot->iv_size + prot->salt_size);
- xor_iv_with_seq(prot, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq);
+ tls_xor_iv_with_seq(prot, rec->iv_data + iv_offset,
+ tls_ctx->tx.rec_seq);
sge->offset += prot->prepend_size;
sge->length -= prot->prepend_size;
@@ -958,7 +943,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
lock_sock(sk);
if (unlikely(msg->msg_controllen)) {
- ret = tls_proccess_cmsg(sk, msg, &record_type);
+ ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
if (ret == -EINPROGRESS)
num_async++;
@@ -1296,65 +1281,67 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
return ret;
}
-static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
- bool nonblock, long timeo, int *err)
+static int
+tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
+ bool released)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- struct sk_buff *skb;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ long timeo;
- while (!(skb = ctx->recv_pkt) && sk_psock_queue_empty(psock)) {
- if (sk->sk_err) {
- *err = sock_error(sk);
- return NULL;
- }
+ timeo = sock_rcvtimeo(sk, nonblock);
+
+ while (!tls_strp_msg_ready(ctx)) {
+ if (!sk_psock_queue_empty(psock))
+ return 0;
+
+ if (sk->sk_err)
+ return sock_error(sk);
if (!skb_queue_empty(&sk->sk_receive_queue)) {
- __strp_unpause(&ctx->strp);
- if (ctx->recv_pkt)
- return ctx->recv_pkt;
+ tls_strp_check_rcv(&ctx->strp);
+ if (tls_strp_msg_ready(ctx))
+ break;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)
- return NULL;
+ return 0;
if (sock_flag(sk, SOCK_DONE))
- return NULL;
+ return 0;
- if (nonblock || !timeo) {
- *err = -EAGAIN;
- return NULL;
- }
+ if (!timeo)
+ return -EAGAIN;
+ released = true;
add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo,
- ctx->recv_pkt != skb ||
+ tls_strp_msg_ready(ctx) ||
!sk_psock_queue_empty(psock),
&wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
/* Handle signals */
- if (signal_pending(current)) {
- *err = sock_intr_errno(timeo);
- return NULL;
- }
+ if (signal_pending(current))
+ return sock_intr_errno(timeo);
}
- return skb;
+ tls_strp_msg_load(&ctx->strp, released);
+
+ return 1;
}
-static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+static int tls_setup_from_iter(struct iov_iter *from,
int length, int *pages_used,
- unsigned int *size_used,
struct scatterlist *to,
int to_max_pages)
{
int rc = 0, i = 0, num_elem = *pages_used, maxpages;
struct page *pages[MAX_SKB_FRAGS];
- unsigned int size = *size_used;
+ unsigned int size = 0;
ssize_t copied, use;
size_t offset;
@@ -1365,7 +1352,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
rc = -EFAULT;
goto out;
}
- copied = iov_iter_get_pages(from, pages,
+ copied = iov_iter_get_pages2(from, pages,
length,
maxpages, &offset);
if (copied <= 0) {
@@ -1373,8 +1360,6 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
goto out;
}
- iov_iter_advance(from, copied);
-
length -= copied;
size += copied;
while (copied) {
@@ -1397,246 +1382,363 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
sg_mark_end(&to[num_elem - 1]);
out:
if (rc)
- iov_iter_revert(from, size - *size_used);
- *size_used = size;
+ iov_iter_revert(from, size);
*pages_used = num_elem;
return rc;
}
+static struct sk_buff *
+tls_alloc_clrtxt_skb(struct sock *sk, struct sk_buff *skb,
+ unsigned int full_len)
+{
+ struct strp_msg *clr_rxm;
+ struct sk_buff *clr_skb;
+ int err;
+
+ clr_skb = alloc_skb_with_frags(0, full_len, TLS_PAGE_ORDER,
+ &err, sk->sk_allocation);
+ if (!clr_skb)
+ return NULL;
+
+ skb_copy_header(clr_skb, skb);
+ clr_skb->len = full_len;
+ clr_skb->data_len = full_len;
+
+ clr_rxm = strp_msg(clr_skb);
+ clr_rxm->offset = 0;
+
+ return clr_skb;
+}
+
+/* Decrypt handlers
+ *
+ * tls_decrypt_sw() and tls_decrypt_device() are decrypt handlers.
+ * They must transform the darg in/out argument are as follows:
+ * | Input | Output
+ * -------------------------------------------------------------------
+ * zc | Zero-copy decrypt allowed | Zero-copy performed
+ * async | Async decrypt allowed | Async crypto used / in progress
+ * skb | * | Output skb
+ *
+ * If ZC decryption was performed darg.skb will point to the input skb.
+ */
+
/* This function decrypts the input skb into either out_iov or in out_sg
- * or in skb buffers itself. The input parameter 'zc' indicates if
+ * or in skb buffers itself. The input parameter 'darg->zc' indicates if
* zero-copy mode needs to be tried or not. With zero-copy mode, either
* out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are
* NULL, then the decryption happens inside skb buffers itself, i.e.
- * zero-copy gets disabled and 'zc' is updated.
+ * zero-copy gets disabled and 'darg->zc' is updated.
*/
-
-static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *out_iov,
- struct scatterlist *out_sg,
- int *chunk, bool *zc, bool async)
+static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
+ struct scatterlist *out_sg,
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
- struct strp_msg *rxm = strp_msg(skb);
- int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
+ int n_sgin, n_sgout, aead_size, err, pages = 0;
+ struct sk_buff *skb = tls_strp_msg(ctx);
+ const struct strp_msg *rxm = strp_msg(skb);
+ const struct tls_msg *tlm = tls_msg(skb);
struct aead_request *aead_req;
- struct sk_buff *unused;
- u8 *aad, *iv, *mem = NULL;
struct scatterlist *sgin = NULL;
struct scatterlist *sgout = NULL;
- const int data_len = rxm->full_len - prot->overhead_size +
- prot->tail_size;
+ const int data_len = rxm->full_len - prot->overhead_size;
+ int tail_pages = !!prot->tail_size;
+ struct tls_decrypt_ctx *dctx;
+ struct sk_buff *clear_skb;
int iv_offset = 0;
+ u8 *mem;
+
+ n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
+ rxm->full_len - prot->prepend_size);
+ if (n_sgin < 1)
+ return n_sgin ?: -EBADMSG;
+
+ if (darg->zc && (out_iov || out_sg)) {
+ clear_skb = NULL;
- if (*zc && (out_iov || out_sg)) {
if (out_iov)
- n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
+ n_sgout = 1 + tail_pages +
+ iov_iter_npages_cap(out_iov, INT_MAX, data_len);
else
n_sgout = sg_nents(out_sg);
- n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
- rxm->full_len - prot->prepend_size);
} else {
- n_sgout = 0;
- *zc = false;
- n_sgin = skb_cow_data(skb, 0, &unused);
- }
+ darg->zc = false;
- if (n_sgin < 1)
- return -EBADMSG;
+ clear_skb = tls_alloc_clrtxt_skb(sk, skb, rxm->full_len);
+ if (!clear_skb)
+ return -ENOMEM;
+
+ n_sgout = 1 + skb_shinfo(clear_skb)->nr_frags;
+ }
/* Increment to accommodate AAD */
n_sgin = n_sgin + 1;
- nsg = n_sgin + n_sgout;
-
- aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
- mem_size = aead_size + (nsg * sizeof(struct scatterlist));
- mem_size = mem_size + prot->aad_size;
- mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
-
/* Allocate a single block of memory which contains
- * aead_req || sgin[] || sgout[] || aad || iv.
- * This order achieves correct alignment for aead_req, sgin, sgout.
+ * aead_req || tls_decrypt_ctx.
+ * Both structs are variable length.
*/
- mem = kmalloc(mem_size, sk->sk_allocation);
- if (!mem)
- return -ENOMEM;
+ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
+ mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout),
+ sk->sk_allocation);
+ if (!mem) {
+ err = -ENOMEM;
+ goto exit_free_skb;
+ }
/* Segment the allocated memory */
aead_req = (struct aead_request *)mem;
- sgin = (struct scatterlist *)(mem + aead_size);
- sgout = sgin + n_sgin;
- aad = (u8 *)(sgout + n_sgout);
- iv = aad + prot->aad_size;
+ dctx = (struct tls_decrypt_ctx *)(mem + aead_size);
+ sgin = &dctx->sg[0];
+ sgout = &dctx->sg[n_sgin];
/* For CCM based ciphers, first byte of nonce+iv is a constant */
switch (prot->cipher_type) {
case TLS_CIPHER_AES_CCM_128:
- iv[0] = TLS_AES_CCM_IV_B0_BYTE;
+ dctx->iv[0] = TLS_AES_CCM_IV_B0_BYTE;
iv_offset = 1;
break;
case TLS_CIPHER_SM4_CCM:
- iv[0] = TLS_SM4_CCM_IV_B0_BYTE;
+ dctx->iv[0] = TLS_SM4_CCM_IV_B0_BYTE;
iv_offset = 1;
break;
}
/* Prepare IV */
- err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
- iv + iv_offset + prot->salt_size,
- prot->iv_size);
- if (err < 0) {
- kfree(mem);
- return err;
- }
if (prot->version == TLS_1_3_VERSION ||
- prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305)
- memcpy(iv + iv_offset, tls_ctx->rx.iv,
- crypto_aead_ivsize(ctx->aead_recv));
- else
- memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
-
- xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq);
+ prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) {
+ memcpy(&dctx->iv[iv_offset], tls_ctx->rx.iv,
+ prot->iv_size + prot->salt_size);
+ } else {
+ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+ &dctx->iv[iv_offset] + prot->salt_size,
+ prot->iv_size);
+ if (err < 0)
+ goto exit_free;
+ memcpy(&dctx->iv[iv_offset], tls_ctx->rx.iv, prot->salt_size);
+ }
+ tls_xor_iv_with_seq(prot, &dctx->iv[iv_offset], tls_ctx->rx.rec_seq);
/* Prepare AAD */
- tls_make_aad(aad, rxm->full_len - prot->overhead_size +
+ tls_make_aad(dctx->aad, rxm->full_len - prot->overhead_size +
prot->tail_size,
- tls_ctx->rx.rec_seq, ctx->control, prot);
+ tls_ctx->rx.rec_seq, tlm->control, prot);
/* Prepare sgin */
sg_init_table(sgin, n_sgin);
- sg_set_buf(&sgin[0], aad, prot->aad_size);
+ sg_set_buf(&sgin[0], dctx->aad, prot->aad_size);
err = skb_to_sgvec(skb, &sgin[1],
rxm->offset + prot->prepend_size,
rxm->full_len - prot->prepend_size);
- if (err < 0) {
- kfree(mem);
- return err;
- }
-
- if (n_sgout) {
- if (out_iov) {
- sg_init_table(sgout, n_sgout);
- sg_set_buf(&sgout[0], aad, prot->aad_size);
-
- *chunk = 0;
- err = tls_setup_from_iter(sk, out_iov, data_len,
- &pages, chunk, &sgout[1],
- (n_sgout - 1));
- if (err < 0)
- goto fallback_to_reg_recv;
- } else if (out_sg) {
- memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
- } else {
- goto fallback_to_reg_recv;
+ if (err < 0)
+ goto exit_free;
+
+ if (clear_skb) {
+ sg_init_table(sgout, n_sgout);
+ sg_set_buf(&sgout[0], dctx->aad, prot->aad_size);
+
+ err = skb_to_sgvec(clear_skb, &sgout[1], prot->prepend_size,
+ data_len + prot->tail_size);
+ if (err < 0)
+ goto exit_free;
+ } else if (out_iov) {
+ sg_init_table(sgout, n_sgout);
+ sg_set_buf(&sgout[0], dctx->aad, prot->aad_size);
+
+ err = tls_setup_from_iter(out_iov, data_len, &pages, &sgout[1],
+ (n_sgout - 1 - tail_pages));
+ if (err < 0)
+ goto exit_free_pages;
+
+ if (prot->tail_size) {
+ sg_unmark_end(&sgout[pages]);
+ sg_set_buf(&sgout[pages + 1], &dctx->tail,
+ prot->tail_size);
+ sg_mark_end(&sgout[pages + 1]);
}
- } else {
-fallback_to_reg_recv:
- sgout = sgin;
- pages = 0;
- *chunk = data_len;
- *zc = false;
+ } else if (out_sg) {
+ memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
}
/* Prepare and submit AEAD request */
- err = tls_do_decryption(sk, skb, sgin, sgout, iv,
- data_len, aead_req, async);
- if (err == -EINPROGRESS)
+ err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
+ data_len + prot->tail_size, aead_req, darg);
+ if (err)
+ goto exit_free_pages;
+
+ darg->skb = clear_skb ?: tls_strp_msg(ctx);
+ clear_skb = NULL;
+
+ if (unlikely(darg->async)) {
+ err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold);
+ if (err)
+ __skb_queue_tail(&ctx->async_hold, darg->skb);
return err;
+ }
+
+ if (prot->tail_size)
+ darg->tail = dctx->tail;
+exit_free_pages:
/* Release the pages in case iov was mapped to pages */
for (; pages > 0; pages--)
put_page(sg_page(&sgout[pages]));
-
+exit_free:
kfree(mem);
+exit_free_skb:
+ consume_skb(clear_skb);
return err;
}
-static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *dest, int *chunk, bool *zc,
- bool async)
+static int
+tls_decrypt_sw(struct sock *sk, struct tls_context *tls_ctx,
+ struct msghdr *msg, struct tls_decrypt_arg *darg)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
- struct strp_msg *rxm = strp_msg(skb);
- int pad, err = 0;
-
- if (!ctx->decrypted) {
- if (tls_ctx->rx_conf == TLS_HW) {
- err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
- if (err < 0)
- return err;
- }
+ struct strp_msg *rxm;
+ int pad, err;
- /* Still not decrypted after tls_device */
- if (!ctx->decrypted) {
- err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
- async);
- if (err < 0) {
- if (err == -EINPROGRESS)
- tls_advance_record_sn(sk, prot,
- &tls_ctx->rx);
- else if (err == -EBADMSG)
- TLS_INC_STATS(sock_net(sk),
- LINUX_MIB_TLSDECRYPTERROR);
- return err;
- }
- } else {
- *zc = false;
- }
+ err = tls_decrypt_sg(sk, &msg->msg_iter, NULL, darg);
+ if (err < 0) {
+ if (err == -EBADMSG)
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
+ return err;
+ }
+ /* keep going even for ->async, the code below is TLS 1.3 */
- pad = padding_length(ctx, prot, skb);
- if (pad < 0)
- return pad;
+ /* If opportunistic TLS 1.3 ZC failed retry without ZC */
+ if (unlikely(darg->zc && prot->version == TLS_1_3_VERSION &&
+ darg->tail != TLS_RECORD_TYPE_DATA)) {
+ darg->zc = false;
+ if (!darg->tail)
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXNOPADVIOL);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTRETRY);
+ return tls_decrypt_sw(sk, tls_ctx, msg, darg);
+ }
- rxm->full_len -= pad;
- rxm->offset += prot->prepend_size;
- rxm->full_len -= prot->overhead_size;
- tls_advance_record_sn(sk, prot, &tls_ctx->rx);
- ctx->decrypted = 1;
- ctx->saved_data_ready(sk);
- } else {
- *zc = false;
+ pad = tls_padding_length(prot, darg->skb, darg);
+ if (pad < 0) {
+ if (darg->skb != tls_strp_msg(ctx))
+ consume_skb(darg->skb);
+ return pad;
}
- return err;
+ rxm = strp_msg(darg->skb);
+ rxm->full_len -= pad;
+
+ return 0;
}
-int decrypt_skb(struct sock *sk, struct sk_buff *skb,
- struct scatterlist *sgout)
+static int
+tls_decrypt_device(struct sock *sk, struct msghdr *msg,
+ struct tls_context *tls_ctx, struct tls_decrypt_arg *darg)
{
- bool zc = true;
- int chunk;
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm;
+ int pad, err;
+
+ if (tls_ctx->rx_conf != TLS_HW)
+ return 0;
+
+ err = tls_device_decrypted(sk, tls_ctx);
+ if (err <= 0)
+ return err;
+
+ pad = tls_padding_length(prot, tls_strp_msg(ctx), darg);
+ if (pad < 0)
+ return pad;
+
+ darg->async = false;
+ darg->skb = tls_strp_msg(ctx);
+ /* ->zc downgrade check, in case TLS 1.3 gets here */
+ darg->zc &= !(prot->version == TLS_1_3_VERSION &&
+ tls_msg(darg->skb)->control != TLS_RECORD_TYPE_DATA);
+
+ rxm = strp_msg(darg->skb);
+ rxm->full_len -= pad;
+
+ if (!darg->zc) {
+ /* Non-ZC case needs a real skb */
+ darg->skb = tls_strp_msg_detach(ctx);
+ if (!darg->skb)
+ return -ENOMEM;
+ } else {
+ unsigned int off, len;
+
+ /* In ZC case nobody cares about the output skb.
+ * Just copy the data here. Note the skb is not fully trimmed.
+ */
+ off = rxm->offset + prot->prepend_size;
+ len = rxm->full_len - prot->overhead_size;
- return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
+ err = skb_copy_datagram_msg(darg->skb, off, msg, len);
+ if (err)
+ return err;
+ }
+ return 1;
}
-static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
- unsigned int len)
+static int tls_rx_one_record(struct sock *sk, struct msghdr *msg,
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm;
+ int err;
- if (skb) {
- struct strp_msg *rxm = strp_msg(skb);
+ err = tls_decrypt_device(sk, msg, tls_ctx, darg);
+ if (!err)
+ err = tls_decrypt_sw(sk, tls_ctx, msg, darg);
+ if (err < 0)
+ return err;
+
+ rxm = strp_msg(darg->skb);
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
+ tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+
+ return 0;
+}
- if (len < rxm->full_len) {
- rxm->offset += len;
- rxm->full_len -= len;
- return false;
+int decrypt_skb(struct sock *sk, struct scatterlist *sgout)
+{
+ struct tls_decrypt_arg darg = { .zc = true, };
+
+ return tls_decrypt_sg(sk, NULL, sgout, &darg);
+}
+
+static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm,
+ u8 *control)
+{
+ int err;
+
+ if (!*control) {
+ *control = tlm->control;
+ if (!*control)
+ return -EBADMSG;
+
+ err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+ sizeof(*control), control);
+ if (*control != TLS_RECORD_TYPE_DATA) {
+ if (err || msg->msg_flags & MSG_CTRUNC)
+ return -EIO;
}
- consume_skb(skb);
+ } else if (*control != tlm->control) {
+ return 0;
}
- /* Finished with message */
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
+ return 1;
+}
- return true;
+static void tls_rx_rec_done(struct tls_sw_context_rx *ctx)
+{
+ tls_strp_msg_done(&ctx->strp);
}
/* This function traverses the rx_list in tls receive context to copies the
@@ -1647,31 +1749,22 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
static int process_rx_list(struct tls_sw_context_rx *ctx,
struct msghdr *msg,
u8 *control,
- bool *cmsg,
size_t skip,
size_t len,
- bool zc,
bool is_peek)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
- u8 ctrl = *control;
- u8 msgc = *cmsg;
struct tls_msg *tlm;
ssize_t copied = 0;
-
- /* Set the record type in 'control' if caller didn't pass it */
- if (!ctrl && skb) {
- tlm = tls_msg(skb);
- ctrl = tlm->control;
- }
+ int err;
while (skip && skb) {
struct strp_msg *rxm = strp_msg(skb);
tlm = tls_msg(skb);
- /* Cannot process a record of different type */
- if (ctrl != tlm->control)
- return 0;
+ err = tls_record_content_type(msg, tlm, control);
+ if (err <= 0)
+ goto out;
if (skip < rxm->full_len)
break;
@@ -1687,31 +1780,14 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
tlm = tls_msg(skb);
- /* Cannot process a record of different type */
- if (ctrl != tlm->control)
- return 0;
-
- /* Set record type if not already done. For a non-data record,
- * do not proceed if record type could not be copied.
- */
- if (!msgc) {
- int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
- sizeof(ctrl), &ctrl);
- msgc = true;
- if (ctrl != TLS_RECORD_TYPE_DATA) {
- if (cerr || msg->msg_flags & MSG_CTRUNC)
- return -EIO;
-
- *cmsg = msgc;
- }
- }
+ err = tls_record_content_type(msg, tlm, control);
+ if (err <= 0)
+ goto out;
- if (!zc || (rxm->full_len - skip) > len) {
- int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
- msg, chunk);
- if (err < 0)
- return err;
- }
+ err = skb_copy_datagram_msg(skb, rxm->offset + skip,
+ msg, chunk);
+ if (err < 0)
+ goto out;
len = len - chunk;
copied = copied + chunk;
@@ -1737,127 +1813,186 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
next_skb = skb_peek_next(skb, &ctx->rx_list);
if (!is_peek) {
- skb_unlink(skb, &ctx->rx_list);
+ __skb_unlink(skb, &ctx->rx_list);
consume_skb(skb);
}
skb = next_skb;
}
+ err = 0;
+
+out:
+ return copied ? : err;
+}
+
+static bool
+tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
+ size_t len_left, size_t decrypted, ssize_t done,
+ size_t *flushed_at)
+{
+ size_t max_rec;
+
+ if (len_left <= decrypted)
+ return false;
+
+ max_rec = prot->overhead_size - prot->tail_size + TLS_MAX_PAYLOAD_SIZE;
+ if (done - *flushed_at < SZ_128K && tcp_inq(sk) > max_rec)
+ return false;
+
+ *flushed_at = done;
+ return sk_flush_backlog(sk);
+}
+
+static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
+{
+ long timeo;
+ int err;
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+
+ while (unlikely(ctx->reader_present)) {
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ ctx->reader_contended = 1;
+
+ add_wait_queue(&ctx->wq, &wait);
+ sk_wait_event(sk, &timeo,
+ !READ_ONCE(ctx->reader_present), &wait);
+ remove_wait_queue(&ctx->wq, &wait);
+
+ if (timeo <= 0) {
+ err = -EAGAIN;
+ goto err_unlock;
+ }
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeo);
+ goto err_unlock;
+ }
+ }
+
+ WRITE_ONCE(ctx->reader_present, 1);
- *control = ctrl;
- return copied;
+ return 0;
+
+err_unlock:
+ release_sock(sk);
+ return err;
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+ if (unlikely(ctx->reader_contended)) {
+ if (wq_has_sleeper(&ctx->wq))
+ wake_up(&ctx->wq);
+ else
+ ctx->reader_contended = 0;
+
+ WARN_ON_ONCE(!ctx->reader_present);
+ }
+
+ WRITE_ONCE(ctx->reader_present, 0);
+ release_sock(sk);
}
int tls_sw_recvmsg(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
+ ssize_t decrypted = 0, async_copy_bytes = 0;
struct sk_psock *psock;
unsigned char control = 0;
- ssize_t decrypted = 0;
+ size_t flushed_at = 0;
struct strp_msg *rxm;
struct tls_msg *tlm;
- struct sk_buff *skb;
ssize_t copied = 0;
- bool cmsg = false;
- int target, err = 0;
- long timeo;
+ bool async = false;
+ int target, err;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool released = true;
bool bpf_strp_enabled;
- int num_async = 0;
- int pending;
-
- flags |= nonblock;
+ bool zc_capable;
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
psock = sk_psock_get(sk);
- lock_sock(sk);
+ err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
+ if (err < 0)
+ return err;
bpf_strp_enabled = sk_psock_strp_enabled(psock);
+ /* If crypto failed the connection is broken */
+ err = ctx->async_wait.err;
+ if (err)
+ goto end;
+
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
- is_peek);
- if (err < 0) {
- tls_err_abort(sk, err);
+ err = process_rx_list(ctx, msg, &control, 0, len, is_peek);
+ if (err < 0)
goto end;
- } else {
- copied = err;
- }
+ copied = err;
if (len <= copied)
- goto recv_end;
+ goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
len = len - copied;
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- while (len && (decrypted + copied < target || ctx->recv_pkt)) {
- bool retain_skb = false;
- bool zc = false;
- int to_decrypt;
- int chunk = 0;
- bool async_capable;
- bool async = false;
-
- skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
- if (!skb) {
- if (psock) {
- int ret = sk_msg_recvmsg(sk, psock, msg, len,
- flags);
- if (ret > 0) {
- decrypted += ret;
- len -= ret;
+ zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
+ ctx->zc_capable;
+ decrypted = 0;
+ while (len && (decrypted + copied < target || tls_strp_msg_ready(ctx))) {
+ struct tls_decrypt_arg darg;
+ int to_decrypt, chunk;
+
+ err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT,
+ released);
+ if (err <= 0) {
+ if (psock) {
+ chunk = sk_msg_recvmsg(sk, psock, msg, len,
+ flags);
+ if (chunk > 0) {
+ decrypted += chunk;
+ len -= chunk;
continue;
}
}
goto recv_end;
- } else {
- tlm = tls_msg(skb);
- if (prot->version == TLS_1_3_VERSION)
- tlm->control = 0;
- else
- tlm->control = ctx->control;
}
- rxm = strp_msg(skb);
+ memset(&darg.inargs, 0, sizeof(darg.inargs));
+
+ rxm = strp_msg(tls_strp_msg(ctx));
+ tlm = tls_msg(tls_strp_msg(ctx));
to_decrypt = rxm->full_len - prot->overhead_size;
- if (to_decrypt <= len && !is_kvec && !is_peek &&
- ctx->control == TLS_RECORD_TYPE_DATA &&
- prot->version != TLS_1_3_VERSION &&
- !bpf_strp_enabled)
- zc = true;
+ if (zc_capable && to_decrypt <= len &&
+ tlm->control == TLS_RECORD_TYPE_DATA)
+ darg.zc = true;
/* Do not use async mode if record is non-data */
- if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
- async_capable = ctx->async_capable;
+ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
+ darg.async = ctx->async_capable;
else
- async_capable = false;
+ darg.async = false;
- err = decrypt_skb_update(sk, skb, &msg->msg_iter,
- &chunk, &zc, async_capable);
- if (err < 0 && err != -EINPROGRESS) {
+ err = tls_rx_one_record(sk, msg, &darg);
+ if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto recv_end;
}
- if (err == -EINPROGRESS) {
- async = true;
- num_async++;
- } else if (prot->version == TLS_1_3_VERSION) {
- tlm->control = ctx->control;
- }
+ async |= darg.async;
/* If the type of records being processed is not known yet,
* set it to record type just dequeued. If it is already known,
@@ -1866,131 +2001,120 @@ int tls_sw_recvmsg(struct sock *sk,
* is known just after record is dequeued from stream parser.
* For tls1.3, we disable async.
*/
-
- if (!control)
- control = tlm->control;
- else if (control != tlm->control)
+ err = tls_record_content_type(msg, tls_msg(darg.skb), &control);
+ if (err <= 0) {
+ DEBUG_NET_WARN_ON_ONCE(darg.zc);
+ tls_rx_rec_done(ctx);
+put_on_rx_list_err:
+ __skb_queue_tail(&ctx->rx_list, darg.skb);
goto recv_end;
-
- if (!cmsg) {
- int cerr;
-
- cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
- sizeof(control), &control);
- cmsg = true;
- if (control != TLS_RECORD_TYPE_DATA) {
- if (cerr || msg->msg_flags & MSG_CTRUNC) {
- err = -EIO;
- goto recv_end;
- }
- }
}
- if (async)
- goto pick_next_record;
+ /* periodically flush backlog, and feed strparser */
+ released = tls_read_flush_backlog(sk, prot, len, to_decrypt,
+ decrypted + copied,
+ &flushed_at);
+
+ /* TLS 1.3 may have updated the length by more than overhead */
+ rxm = strp_msg(darg.skb);
+ chunk = rxm->full_len;
+ tls_rx_rec_done(ctx);
+
+ if (!darg.zc) {
+ bool partially_consumed = chunk > len;
+ struct sk_buff *skb = darg.skb;
+
+ DEBUG_NET_WARN_ON_ONCE(darg.skb == ctx->strp.anchor);
+
+ if (async) {
+ /* TLS 1.2-only, to_decrypt must be text len */
+ chunk = min_t(int, to_decrypt, len);
+ async_copy_bytes += chunk;
+put_on_rx_list:
+ decrypted += chunk;
+ len -= chunk;
+ __skb_queue_tail(&ctx->rx_list, skb);
+ continue;
+ }
- if (!zc) {
if (bpf_strp_enabled) {
+ released = true;
err = sk_psock_tls_strp_read(psock, skb);
if (err != __SK_PASS) {
rxm->offset = rxm->offset + rxm->full_len;
rxm->full_len = 0;
if (err == __SK_DROP)
consume_skb(skb);
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
continue;
}
}
- if (rxm->full_len > len) {
- retain_skb = true;
+ if (partially_consumed)
chunk = len;
- } else {
- chunk = rxm->full_len;
- }
err = skb_copy_datagram_msg(skb, rxm->offset,
msg, chunk);
if (err < 0)
- goto recv_end;
+ goto put_on_rx_list_err;
- if (!is_peek) {
- rxm->offset = rxm->offset + chunk;
- rxm->full_len = rxm->full_len - chunk;
+ if (is_peek)
+ goto put_on_rx_list;
+
+ if (partially_consumed) {
+ rxm->offset += chunk;
+ rxm->full_len -= chunk;
+ goto put_on_rx_list;
}
- }
-pick_next_record:
- if (chunk > len)
- chunk = len;
+ consume_skb(skb);
+ }
decrypted += chunk;
len -= chunk;
- /* For async or peek case, queue the current skb */
- if (async || is_peek || retain_skb) {
- skb_queue_tail(&ctx->rx_list, skb);
- skb = NULL;
- }
-
- if (tls_sw_advance_skb(sk, skb, chunk)) {
- /* Return full control message to
- * userspace before trying to parse
- * another message type
- */
- msg->msg_flags |= MSG_EOR;
- if (control != TLS_RECORD_TYPE_DATA)
- goto recv_end;
- } else {
+ /* Return full control message to userspace before trying
+ * to parse another message type
+ */
+ msg->msg_flags |= MSG_EOR;
+ if (control != TLS_RECORD_TYPE_DATA)
break;
- }
}
recv_end:
- if (num_async) {
+ if (async) {
+ int ret, pending;
+
/* Wait for all previously submitted records to be decrypted */
spin_lock_bh(&ctx->decrypt_compl_lock);
- ctx->async_notify = true;
+ reinit_completion(&ctx->async_wait.completion);
pending = atomic_read(&ctx->decrypt_pending);
spin_unlock_bh(&ctx->decrypt_compl_lock);
- if (pending) {
- err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- if (err) {
- /* one of async decrypt failed */
- tls_err_abort(sk, err);
- copied = 0;
- decrypted = 0;
- goto end;
- }
- } else {
- reinit_completion(&ctx->async_wait.completion);
- }
+ ret = 0;
+ if (pending)
+ ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ __skb_queue_purge(&ctx->async_hold);
- /* There can be no concurrent accesses, since we have no
- * pending decrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
+ if (ret) {
+ if (err >= 0 || err == -EINPROGRESS)
+ err = ret;
+ decrypted = 0;
+ goto end;
+ }
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, &cmsg, copied,
- decrypted, false, is_peek);
+ err = process_rx_list(ctx, msg, &control, copied,
+ decrypted, is_peek);
else
- err = process_rx_list(ctx, msg, &control, &cmsg, 0,
- decrypted, true, is_peek);
- if (err < 0) {
- tls_err_abort(sk, err);
- copied = 0;
- goto end;
- }
+ err = process_rx_list(ctx, msg, &control, 0,
+ async_copy_bytes, is_peek);
+ decrypted = max(err, 0);
}
copied += decrypted;
end:
- release_sock(sk);
- sk_defer_free_flush(sk);
+ tls_rx_reader_unlock(sk, ctx);
if (psock)
sk_psock_put(sk, psock);
return copied ? : err;
@@ -2004,62 +2128,67 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm = NULL;
struct sock *sk = sock->sk;
+ struct tls_msg *tlm;
struct sk_buff *skb;
ssize_t copied = 0;
- bool from_queue;
- int err = 0;
- long timeo;
int chunk;
- bool zc = false;
+ int err;
- lock_sock(sk);
-
- timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+ err = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK);
+ if (err < 0)
+ return err;
- from_queue = !skb_queue_empty(&ctx->rx_list);
- if (from_queue) {
+ if (!skb_queue_empty(&ctx->rx_list)) {
skb = __skb_dequeue(&ctx->rx_list);
} else {
- skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
- &err);
- if (!skb)
+ struct tls_decrypt_arg darg;
+
+ err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK,
+ true);
+ if (err <= 0)
goto splice_read_end;
- err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
+ memset(&darg.inargs, 0, sizeof(darg.inargs));
+
+ err = tls_rx_one_record(sk, NULL, &darg);
if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto splice_read_end;
}
+
+ tls_rx_rec_done(ctx);
+ skb = darg.skb;
}
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+
/* splice does not support reading control messages */
- if (ctx->control != TLS_RECORD_TYPE_DATA) {
+ if (tlm->control != TLS_RECORD_TYPE_DATA) {
err = -EINVAL;
- goto splice_read_end;
+ goto splice_requeue;
}
- rxm = strp_msg(skb);
-
chunk = min_t(unsigned int, rxm->full_len, len);
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
if (copied < 0)
- goto splice_read_end;
+ goto splice_requeue;
- if (!from_queue) {
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
- }
if (chunk < rxm->full_len) {
- __skb_queue_head(&ctx->rx_list, skb);
rxm->offset += len;
rxm->full_len -= len;
- } else {
- consume_skb(skb);
+ goto splice_requeue;
}
+ consume_skb(skb);
+
splice_read_end:
- release_sock(sk);
+ tls_rx_reader_unlock(sk, ctx);
return copied ? : err;
+
+splice_requeue:
+ __skb_queue_head(&ctx->rx_list, skb);
+ goto splice_read_end;
}
bool tls_sw_sock_is_readable(struct sock *sk)
@@ -2075,23 +2204,21 @@ bool tls_sw_sock_is_readable(struct sock *sk)
ingress_empty = list_empty(&psock->ingress_msg);
rcu_read_unlock();
- return !ingress_empty || ctx->recv_pkt ||
+ return !ingress_empty || tls_strp_msg_ready(ctx) ||
!skb_queue_empty(&ctx->rx_list);
}
-static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
- struct strp_msg *rxm = strp_msg(skb);
size_t cipher_overhead;
size_t data_len = 0;
int ret;
/* Verify that we have a full TLS header, or wait for more data */
- if (rxm->offset + prot->prepend_size > skb->len)
+ if (strp->stm.offset + prot->prepend_size > skb->len)
return 0;
/* Sanity-check size of on-stack buffer. */
@@ -2101,12 +2228,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
}
/* Linearize header to local buffer */
- ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
-
+ ret = skb_copy_bits(skb, strp->stm.offset, header, prot->prepend_size);
if (ret < 0)
goto read_failure;
- ctx->control = header[0];
+ strp->mark = header[0];
data_len = ((header[4] & 0xFF) | (header[3] << 8));
@@ -2133,7 +2259,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
}
tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
- TCP_SKB_CB(skb)->seq + rxm->offset);
+ TCP_SKB_CB(skb)->seq + strp->stm.offset);
return data_len + TLS_HEADER_SIZE;
read_failure:
@@ -2142,16 +2268,11 @@ read_failure:
return ret;
}
-static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+void tls_rx_msg_ready(struct tls_strparser *strp)
{
- struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-
- ctx->decrypted = 0;
-
- ctx->recv_pkt = skb;
- strp_pause(strp);
+ struct tls_sw_context_rx *ctx;
+ ctx = container_of(strp, struct tls_sw_context_rx, strp);
ctx->saved_data_ready(strp->sk);
}
@@ -2161,7 +2282,7 @@ static void tls_data_ready(struct sock *sk)
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
- strp_data_ready(&ctx->strp);
+ tls_strp_data_ready(&ctx->strp);
psock = sk_psock_get(sk);
if (psock) {
@@ -2237,13 +2358,11 @@ void tls_sw_release_resources_rx(struct sock *sk)
kfree(tls_ctx->rx.iv);
if (ctx->aead_recv) {
- kfree_skb(ctx->recv_pkt);
- ctx->recv_pkt = NULL;
- skb_queue_purge(&ctx->rx_list);
+ __skb_queue_purge(&ctx->rx_list);
crypto_free_aead(ctx->aead_recv);
- strp_stop(&ctx->strp);
+ tls_strp_stop(&ctx->strp);
/* If tls_sw_strparser_arm() was not called (cleanup paths)
- * we still want to strp_stop(), but sk->sk_data_ready was
+ * we still want to tls_strp_stop(), but sk->sk_data_ready was
* never swapped.
*/
if (ctx->saved_data_ready) {
@@ -2258,7 +2377,7 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx)
{
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- strp_done(&ctx->strp);
+ tls_strp_done(&ctx->strp);
}
void tls_sw_free_ctx_rx(struct tls_context *tls_ctx)
@@ -2302,12 +2421,23 @@ static void tx_work_handler(struct work_struct *work)
mutex_unlock(&tls_ctx->tx_lock);
}
+static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
+{
+ struct tls_rec *rec;
+
+ rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
+ if (!rec)
+ return false;
+
+ return READ_ONCE(rec->tx_ready);
+}
+
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
{
struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
/* Schedule the transmission if tx list is ready */
- if (is_tx_ready(tx_ctx) &&
+ if (tls_is_tx_ready(tx_ctx) &&
!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
@@ -2320,8 +2450,14 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
rx_ctx->saved_data_ready = sk->sk_data_ready;
sk->sk_data_ready = tls_data_ready;
write_unlock_bh(&sk->sk_callback_lock);
+}
+
+void tls_update_rx_zc_capable(struct tls_context *tls_ctx)
+{
+ struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx);
- strp_check_rcv(&rx_ctx->strp);
+ rx_ctx->zc_capable = tls_ctx->rx_no_pad ||
+ tls_ctx->prot_info.version != TLS_1_3_VERSION;
}
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
@@ -2333,7 +2469,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
struct tls_sw_context_rx *sw_ctx_rx = NULL;
struct cipher_context *cctx;
struct crypto_aead **aead;
- struct strp_callbacks cb;
u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
struct crypto_tfm *tfm;
char *iv, *rec_seq, *key, *salt, *cipher_name;
@@ -2383,9 +2518,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
} else {
crypto_init_wait(&sw_ctx_rx->async_wait);
spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+ init_waitqueue_head(&sw_ctx_rx->wq);
crypto_info = &ctx->crypto_recv.info;
cctx = &ctx->rx;
skb_queue_head_init(&sw_ctx_rx->rx_list);
+ skb_queue_head_init(&sw_ctx_rx->async_hold);
aead = &sw_ctx_rx->aead_recv;
}
@@ -2492,14 +2629,41 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
cipher_name = "ccm(sm4)";
break;
}
- default:
- rc = -EINVAL;
- goto free_priv;
+ case TLS_CIPHER_ARIA_GCM_128: {
+ struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info;
+
+ aria_gcm_128_info = (void *)crypto_info;
+ nonce_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
+ tag_size = TLS_CIPHER_ARIA_GCM_128_TAG_SIZE;
+ iv_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
+ iv = aria_gcm_128_info->iv;
+ rec_seq_size = TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE;
+ rec_seq = aria_gcm_128_info->rec_seq;
+ keysize = TLS_CIPHER_ARIA_GCM_128_KEY_SIZE;
+ key = aria_gcm_128_info->key;
+ salt = aria_gcm_128_info->salt;
+ salt_size = TLS_CIPHER_ARIA_GCM_128_SALT_SIZE;
+ cipher_name = "gcm(aria)";
+ break;
}
+ case TLS_CIPHER_ARIA_GCM_256: {
+ struct tls12_crypto_info_aria_gcm_256 *gcm_256_info;
- /* Sanity-check the sizes for stack allocations. */
- if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
- rec_seq_size > TLS_MAX_REC_SEQ_SIZE) {
+ gcm_256_info = (void *)crypto_info;
+ nonce_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
+ tag_size = TLS_CIPHER_ARIA_GCM_256_TAG_SIZE;
+ iv_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
+ iv = gcm_256_info->iv;
+ rec_seq_size = TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE;
+ rec_seq = gcm_256_info->rec_seq;
+ keysize = TLS_CIPHER_ARIA_GCM_256_KEY_SIZE;
+ key = gcm_256_info->key;
+ salt = gcm_256_info->salt;
+ salt_size = TLS_CIPHER_ARIA_GCM_256_SALT_SIZE;
+ cipher_name = "gcm(aria)";
+ break;
+ }
+ default:
rc = -EINVAL;
goto free_priv;
}
@@ -2513,6 +2677,14 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
prot->tail_size = 0;
}
+ /* Sanity-check the sizes for stack allocations. */
+ if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
+ rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE ||
+ prot->aad_size > TLS_MAX_AAD_SIZE) {
+ rc = -EINVAL;
+ goto free_priv;
+ }
+
prot->version = crypto_info->version;
prot->cipher_type = crypto_info->cipher_type;
prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
@@ -2559,19 +2731,14 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
if (sw_ctx_rx) {
tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
- if (crypto_info->version == TLS_1_3_VERSION)
- sw_ctx_rx->async_capable = 0;
- else
- sw_ctx_rx->async_capable =
- !!(tfm->__crt_alg->cra_flags &
- CRYPTO_ALG_ASYNC);
-
- /* Set up strparser */
- memset(&cb, 0, sizeof(cb));
- cb.rcv_msg = tls_queue;
- cb.parse_msg = tls_read_size;
+ tls_update_rx_zc_capable(ctx);
+ sw_ctx_rx->async_capable =
+ crypto_info->version != TLS_1_3_VERSION &&
+ !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC);
- strp_init(&sw_ctx_rx->strp, sk, &cb);
+ rc = tls_strp_init(&sw_ctx_rx->strp, sk);
+ if (rc)
+ goto free_aead;
}
goto out;
diff --git a/net/tls/tls_toe.c b/net/tls/tls_toe.c
index 7e1330f19165..825669e1ab47 100644
--- a/net/tls/tls_toe.c
+++ b/net/tls/tls_toe.c
@@ -38,6 +38,8 @@
#include <net/tls.h>
#include <net/tls_toe.h>
+#include "tls.h"
+
static LIST_HEAD(device_list);
static DEFINE_SPINLOCK(device_spinlock);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c19569819866..b3545fc68097 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,15 +118,13 @@
#include "scm.h"
-spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
-EXPORT_SYMBOL_GPL(unix_table_locks);
-struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
-EXPORT_SYMBOL_GPL(unix_socket_table);
static atomic_long_t unix_nr_socks;
+static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
+static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
/* SMP locking strategy:
- * hash table is protected with spinlock unix_table_locks
- * each socket state is protected by separate spin lock.
+ * hash table is protected with spinlock.
+ * each socket state is protected by separate spinlock.
*/
static unsigned int unix_unbound_hash(struct sock *sk)
@@ -137,12 +135,12 @@ static unsigned int unix_unbound_hash(struct sock *sk)
hash ^= hash >> 8;
hash ^= sk->sk_type;
- return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1));
+ return hash & UNIX_HASH_MOD;
}
static unsigned int unix_bsd_hash(struct inode *i)
{
- return i->i_ino & (UNIX_HASH_SIZE - 1);
+ return i->i_ino & UNIX_HASH_MOD;
}
static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
@@ -155,26 +153,34 @@ static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
hash ^= hash >> 8;
hash ^= type;
- return hash & (UNIX_HASH_SIZE - 1);
+ return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
}
-static void unix_table_double_lock(unsigned int hash1, unsigned int hash2)
+static void unix_table_double_lock(struct net *net,
+ unsigned int hash1, unsigned int hash2)
{
- /* hash1 and hash2 is never the same because
- * one is between 0 and UNIX_HASH_SIZE - 1, and
- * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2.
- */
+ if (hash1 == hash2) {
+ spin_lock(&net->unx.table.locks[hash1]);
+ return;
+ }
+
if (hash1 > hash2)
swap(hash1, hash2);
- spin_lock(&unix_table_locks[hash1]);
- spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING);
+ spin_lock(&net->unx.table.locks[hash1]);
+ spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
}
-static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2)
+static void unix_table_double_unlock(struct net *net,
+ unsigned int hash1, unsigned int hash2)
{
- spin_unlock(&unix_table_locks[hash1]);
- spin_unlock(&unix_table_locks[hash2]);
+ if (hash1 == hash2) {
+ spin_unlock(&net->unx.table.locks[hash1]);
+ return;
+ }
+
+ spin_unlock(&net->unx.table.locks[hash1]);
+ spin_unlock(&net->unx.table.locks[hash2]);
}
#ifdef CONFIG_SECURITY_NETWORK
@@ -300,34 +306,52 @@ static void __unix_remove_socket(struct sock *sk)
sk_del_node_init(sk);
}
-static void __unix_insert_socket(struct sock *sk)
+static void __unix_insert_socket(struct net *net, struct sock *sk)
{
- WARN_ON(!sk_unhashed(sk));
- sk_add_node(sk, &unix_socket_table[sk->sk_hash]);
+ DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
+ sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
}
-static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr,
- unsigned int hash)
+static void __unix_set_addr_hash(struct net *net, struct sock *sk,
+ struct unix_address *addr, unsigned int hash)
{
__unix_remove_socket(sk);
smp_store_release(&unix_sk(sk)->addr, addr);
sk->sk_hash = hash;
- __unix_insert_socket(sk);
+ __unix_insert_socket(net, sk);
}
-static void unix_remove_socket(struct sock *sk)
+static void unix_remove_socket(struct net *net, struct sock *sk)
{
- spin_lock(&unix_table_locks[sk->sk_hash]);
+ spin_lock(&net->unx.table.locks[sk->sk_hash]);
__unix_remove_socket(sk);
- spin_unlock(&unix_table_locks[sk->sk_hash]);
+ spin_unlock(&net->unx.table.locks[sk->sk_hash]);
}
-static void unix_insert_unbound_socket(struct sock *sk)
+static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
{
- spin_lock(&unix_table_locks[sk->sk_hash]);
- __unix_insert_socket(sk);
- spin_unlock(&unix_table_locks[sk->sk_hash]);
+ spin_lock(&net->unx.table.locks[sk->sk_hash]);
+ __unix_insert_socket(net, sk);
+ spin_unlock(&net->unx.table.locks[sk->sk_hash]);
+}
+
+static void unix_insert_bsd_socket(struct sock *sk)
+{
+ spin_lock(&bsd_socket_locks[sk->sk_hash]);
+ sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
+ spin_unlock(&bsd_socket_locks[sk->sk_hash]);
+}
+
+static void unix_remove_bsd_socket(struct sock *sk)
+{
+ if (!hlist_unhashed(&sk->sk_bind_node)) {
+ spin_lock(&bsd_socket_locks[sk->sk_hash]);
+ __sk_del_bind_node(sk);
+ spin_unlock(&bsd_socket_locks[sk->sk_hash]);
+
+ sk_node_init(&sk->sk_bind_node);
+ }
}
static struct sock *__unix_find_socket_byname(struct net *net,
@@ -336,12 +360,9 @@ static struct sock *__unix_find_socket_byname(struct net *net,
{
struct sock *s;
- sk_for_each(s, &unix_socket_table[hash]) {
+ sk_for_each(s, &net->unx.table.buckets[hash]) {
struct unix_sock *u = unix_sk(s);
- if (!net_eq(sock_net(s), net))
- continue;
-
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
return s;
@@ -355,11 +376,11 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
{
struct sock *s;
- spin_lock(&unix_table_locks[hash]);
+ spin_lock(&net->unx.table.locks[hash]);
s = __unix_find_socket_byname(net, sunname, len, hash);
if (s)
sock_hold(s);
- spin_unlock(&unix_table_locks[hash]);
+ spin_unlock(&net->unx.table.locks[hash]);
return s;
}
@@ -368,17 +389,17 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
unsigned int hash = unix_bsd_hash(i);
struct sock *s;
- spin_lock(&unix_table_locks[hash]);
- sk_for_each(s, &unix_socket_table[hash]) {
+ spin_lock(&bsd_socket_locks[hash]);
+ sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
- spin_unlock(&unix_table_locks[hash]);
+ spin_unlock(&bsd_socket_locks[hash]);
return s;
}
}
- spin_unlock(&unix_table_locks[hash]);
+ spin_unlock(&bsd_socket_locks[hash]);
return NULL;
}
@@ -490,7 +511,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
+ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
@@ -548,15 +569,9 @@ static void unix_sock_destructor(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue);
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
- }
-#endif
- WARN_ON(refcount_read(&sk->sk_wmem_alloc));
- WARN_ON(!sk_unhashed(sk));
- WARN_ON(sk->sk_socket);
+ DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
+ DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
+ DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
if (!sock_flag(sk, SOCK_DEAD)) {
pr_info("Attempt to release alive unix socket: %p\n", sk);
return;
@@ -576,12 +591,13 @@ static void unix_sock_destructor(struct sock *sk)
static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
- struct path path;
struct sock *skpair;
struct sk_buff *skb;
+ struct path path;
int state;
- unix_remove_socket(sk);
+ unix_remove_socket(sock_net(sk), sk);
+ unix_remove_bsd_socket(sk);
/* Clear state */
unix_state_lock(sk);
@@ -598,6 +614,13 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_state_unlock(sk);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
+
wake_up_interruptible_all(&u->peer_wait);
if (skpair != NULL) {
@@ -741,10 +764,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor);
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor);
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -765,15 +786,45 @@ static int unix_set_peek_off(struct sock *sk, int val)
}
#ifdef CONFIG_PROC_FS
+static int unix_count_nr_fds(struct sock *sk)
+{
+ struct sk_buff *skb;
+ struct unix_sock *u;
+ int nr_fds = 0;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
+ while (skb) {
+ u = unix_sk(skb->sk);
+ nr_fds += atomic_read(&u->scm_stat.nr_fds);
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ }
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ return nr_fds;
+}
+
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
struct unix_sock *u;
+ int nr_fds;
if (sk) {
- u = unix_sk(sock->sk);
- seq_printf(m, "scm_fds: %u\n",
- atomic_read(&u->scm_stat.nr_fds));
+ u = unix_sk(sk);
+ if (sock->type == SOCK_DGRAM) {
+ nr_fds = atomic_read(&u->scm_stat.nr_fds);
+ goto out_print;
+ }
+
+ unix_state_lock(sk);
+ if (sk->sk_state != TCP_LISTEN)
+ nr_fds = atomic_read(&u->scm_stat.nr_fds);
+ else
+ nr_fds = unix_count_nr_fds(sk);
+ unix_state_unlock(sk);
+out_print:
+ seq_printf(m, "scm_fds: %u\n", nr_fds);
}
}
#else
@@ -798,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
.shutdown = unix_shutdown,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
- .read_sock = unix_stream_read_sock,
+ .read_skb = unix_stream_read_skb,
.mmap = sock_no_mmap,
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
@@ -823,7 +874,7 @@ static const struct proto_ops unix_dgram_ops = {
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
- .read_sock = unix_read_sock,
+ .read_skb = unix_read_skb,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
@@ -930,9 +981,9 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
- unix_insert_unbound_socket(sk);
+ unix_insert_unbound_socket(net, sk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ sock_prot_inuse_add(net, sk->sk_prot, 1);
return sk;
@@ -993,8 +1044,8 @@ static int unix_release(struct socket *sock)
return 0;
}
-static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
- int addr_len, int type)
+static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
+ int type)
{
struct inode *inode;
struct path path;
@@ -1063,7 +1114,7 @@ static struct sock *unix_find_other(struct net *net,
struct sock *sk;
if (sunaddr->sun_path[0])
- sk = unix_find_bsd(net, sunaddr, addr_len, type);
+ sk = unix_find_bsd(sunaddr, addr_len, type);
else
sk = unix_find_abstract(net, sunaddr, addr_len, type);
@@ -1074,6 +1125,7 @@ static int unix_autobind(struct sock *sk)
{
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ struct net *net = sock_net(sk);
struct unix_address *addr;
u32 lastnum, ordernum;
int err;
@@ -1095,18 +1147,17 @@ static int unix_autobind(struct sock *sk)
addr->name->sun_family = AF_UNIX;
refcount_set(&addr->refcnt, 1);
- ordernum = prandom_u32();
+ ordernum = get_random_u32();
lastnum = ordernum & 0xFFFFF;
retry:
ordernum = (ordernum + 1) & 0xFFFFF;
sprintf(addr->name->sun_path + 1, "%05x", ordernum);
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
- unix_table_double_lock(old_hash, new_hash);
+ unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
- new_hash)) {
- unix_table_double_unlock(old_hash, new_hash);
+ if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
+ unix_table_double_unlock(net, old_hash, new_hash);
/* __unix_find_socket_byname() may take long time if many names
* are already in use.
@@ -1123,8 +1174,8 @@ retry:
goto retry;
}
- __unix_set_addr_hash(sk, addr, new_hash);
- unix_table_double_unlock(old_hash, new_hash);
+ __unix_set_addr_hash(net, sk, addr, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
err = 0;
out: mutex_unlock(&u->bindlock);
@@ -1138,6 +1189,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
(SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ struct net *net = sock_net(sk);
struct user_namespace *ns; // barf...
struct unix_address *addr;
struct dentry *dentry;
@@ -1178,11 +1230,12 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
goto out_unlock;
new_hash = unix_bsd_hash(d_backing_inode(dentry));
- unix_table_double_lock(old_hash, new_hash);
+ unix_table_double_lock(net, old_hash, new_hash);
u->path.mnt = mntget(parent.mnt);
u->path.dentry = dget(dentry);
- __unix_set_addr_hash(sk, addr, new_hash);
- unix_table_double_unlock(old_hash, new_hash);
+ __unix_set_addr_hash(net, sk, addr, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
+ unix_insert_bsd_socket(sk);
mutex_unlock(&u->bindlock);
done_path_create(&parent, dentry);
return 0;
@@ -1205,6 +1258,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
{
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
+ struct net *net = sock_net(sk);
struct unix_address *addr;
int err;
@@ -1222,19 +1276,18 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
}
new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
- unix_table_double_lock(old_hash, new_hash);
+ unix_table_double_lock(net, old_hash, new_hash);
- if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
- new_hash))
+ if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
goto out_spin;
- __unix_set_addr_hash(sk, addr, new_hash);
- unix_table_double_unlock(old_hash, new_hash);
+ __unix_set_addr_hash(net, sk, addr, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
mutex_unlock(&u->bindlock);
return 0;
out_spin:
- unix_table_double_unlock(old_hash, new_hash);
+ unix_table_double_unlock(net, old_hash, new_hash);
err = -EADDRINUSE;
out_mutex:
mutex_unlock(&u->bindlock);
@@ -1293,9 +1346,8 @@ static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
+ struct sock *sk = sock->sk;
struct sock *other;
int err;
@@ -1316,7 +1368,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
}
restart:
- other = unix_find_other(net, sunaddr, alen, sock->type);
+ other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
if (IS_ERR(other)) {
err = PTR_ERR(other);
goto out;
@@ -1404,15 +1456,13 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
+ struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
- struct sock *newsk = NULL;
- struct sock *other = NULL;
+ struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
- int st;
- int err;
long timeo;
+ int err;
+ int st;
err = unix_validate_addr(sunaddr, addr_len);
if (err)
@@ -1432,7 +1482,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
*/
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
+ newsk = unix_create1(net, NULL, 0, sock->type);
if (IS_ERR(newsk)) {
err = PTR_ERR(newsk);
newsk = NULL;
@@ -1541,9 +1591,9 @@ restart:
*
* The contents of *(otheru->addr) and otheru->path
* are seen fully set up here, since we have found
- * otheru in hash under unix_table_locks. Insertion
- * into the hash chain we'd found it in had been done
- * in an earlier critical area protected by unix_table_locks,
+ * otheru in hash under its lock. Insertion into the
+ * hash chain we'd found it in had been done in an
+ * earlier critical area protected by the chain's lock,
* the same one where we'd set *(otheru->addr) contents,
* as well as otheru->path and otheru->addr itself.
*
@@ -1643,7 +1693,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
* so that no locks are necessary.
*/
- skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
+ skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &err);
if (!skb) {
/* This means receive shutdown. */
if (err == 0)
@@ -1808,11 +1859,9 @@ static int maybe_init_creds(struct scm_cookie *scm,
static bool unix_skb_scm_eq(struct sk_buff *skb,
struct scm_cookie *scm)
{
- const struct unix_skb_parms *u = &UNIXCB(skb);
-
- return u->pid == scm->pid &&
- uid_eq(u->uid, scm->creds.uid) &&
- gid_eq(u->gid, scm->creds.gid) &&
+ return UNIXCB(skb).pid == scm->pid &&
+ uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
+ gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
unix_secdata_eq(scm, skb);
}
@@ -1841,17 +1890,15 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
- struct unix_sock *u = unix_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
- struct sock *other = NULL;
- int err;
- struct sk_buff *skb;
- long timeo;
+ struct sock *sk = sock->sk, *other = NULL;
+ struct unix_sock *u = unix_sk(sk);
struct scm_cookie scm;
+ struct sk_buff *skb;
int data_len = 0;
int sk_locked;
+ long timeo;
+ int err;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
@@ -1918,7 +1965,7 @@ restart:
if (sunaddr == NULL)
goto out_free;
- other = unix_find_other(net, sunaddr, msg->msg_namelen,
+ other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
sk->sk_type);
if (IS_ERR(other)) {
err = PTR_ERR(other);
@@ -2049,7 +2096,7 @@ out:
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
{
struct unix_sock *ousk = unix_sk(other);
@@ -2084,7 +2131,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
- ousk->oob_skb = skb;
+ WRITE_ONCE(ousk->oob_skb, skb);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
@@ -2115,7 +2162,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
err = -EOPNOTSUPP;
if (msg->msg_flags & MSG_OOB) {
-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (len)
len--;
else
@@ -2186,7 +2233,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
sent += size;
}
-#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (msg->msg_flags & MSG_OOB) {
err = queue_oob(sock, msg, other);
if (err)
@@ -2483,42 +2530,25 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
const struct proto *prot = READ_ONCE(sk->sk_prot);
if (prot != &unix_dgram_proto)
- return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, NULL);
+ return prot->recvmsg(sk, msg, size, flags, NULL);
#endif
return __unix_dgram_recvmsg(sk, msg, size, flags);
}
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- int copied = 0;
-
- while (1) {
- struct unix_sock *u = unix_sk(sk);
- struct sk_buff *skb;
- int used, err;
-
- mutex_lock(&u->iolock);
- skb = skb_recv_datagram(sk, 0, 1, &err);
- mutex_unlock(&u->iolock);
- if (!skb)
- return err;
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int err, copied;
- used = recv_actor(desc, skb, 0, skb->len);
- if (used <= 0) {
- if (!copied)
- copied = used;
- kfree_skb(skb);
- break;
- } else if (used <= skb->len) {
- copied += used;
- }
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
- kfree_skb(skb);
- if (!desc->count)
- break;
- }
+ copied = recv_actor(sk, skb);
+ kfree_skb(skb);
return copied;
}
@@ -2530,13 +2560,14 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
struct sk_buff *last, unsigned int last_len,
bool freezable)
{
+ unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
struct sk_buff *tail;
DEFINE_WAIT(wait);
unix_state_lock(sk);
for (;;) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, state);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail != last ||
@@ -2549,10 +2580,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
- if (freezable)
- timeo = freezable_schedule_timeout(timeo);
- else
- timeo = schedule_timeout(timeo);
+ timeo = schedule_timeout(timeo);
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD))
@@ -2602,9 +2630,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
oob_skb = u->oob_skb;
- if (!(state->flags & MSG_PEEK)) {
- u->oob_skb = NULL;
- }
+ if (!(state->flags & MSG_PEEK))
+ WRITE_ONCE(u->oob_skb, NULL);
unix_state_unlock(sk);
@@ -2639,7 +2666,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
skb = NULL;
} else if (sock_flag(sk, SOCK_URGINLINE)) {
if (!(flags & MSG_PEEK)) {
- u->oob_skb = NULL;
+ WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
}
} else if (!(flags & MSG_PEEK)) {
@@ -2653,13 +2680,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
}
#endif
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
if (unlikely(sk->sk_state != TCP_ESTABLISHED))
return -ENOTCONN;
- return unix_read_sock(sk, desc, recv_actor);
+ return unix_read_skb(sk, recv_actor);
}
static int unix_stream_read_generic(struct unix_stream_read_state *state,
@@ -2917,8 +2943,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
const struct proto *prot = READ_ONCE(sk->sk_prot);
if (prot != &unix_stream_proto)
- return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, NULL);
+ return prot->recvmsg(sk, msg, size, flags, NULL);
#endif
return unix_stream_read_generic(&state, true);
}
@@ -3094,11 +3119,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCATMARK:
{
struct sk_buff *skb;
- struct unix_sock *u = unix_sk(sk);
int answ = 0;
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == u->oob_skb)
+ if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
answ = 1;
err = put_user(answ, (int __user *)arg);
}
@@ -3139,6 +3163,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
mask |= EPOLLIN | EPOLLRDNORM;
if (sk_is_readable(sk))
mask |= EPOLLIN | EPOLLRDNORM;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (READ_ONCE(unix_sk(sk)->oob_skb))
+ mask |= EPOLLPRI;
+#endif
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
@@ -3227,12 +3255,11 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
{
unsigned long offset = get_offset(*pos);
unsigned long bucket = get_bucket(*pos);
- struct sock *sk;
unsigned long count = 0;
+ struct sock *sk;
- for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
- if (sock_net(sk) != seq_file_net(seq))
- continue;
+ for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
+ sk; sk = sk_next(sk)) {
if (++count == offset)
break;
}
@@ -3240,49 +3267,60 @@ static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
return sk;
}
-static struct sock *unix_next_socket(struct seq_file *seq,
- struct sock *sk,
- loff_t *pos)
+static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
{
unsigned long bucket = get_bucket(*pos);
+ struct net *net = seq_file_net(seq);
+ struct sock *sk;
- while (sk > (struct sock *)SEQ_START_TOKEN) {
- sk = sk_next(sk);
- if (!sk)
- goto next_bucket;
- if (sock_net(sk) == seq_file_net(seq))
- return sk;
- }
+ while (bucket < UNIX_HASH_SIZE) {
+ spin_lock(&net->unx.table.locks[bucket]);
- do {
- spin_lock(&unix_table_locks[bucket]);
sk = unix_from_bucket(seq, pos);
if (sk)
return sk;
-next_bucket:
- spin_unlock(&unix_table_locks[bucket++]);
- *pos = set_bucket_offset(bucket, 1);
- } while (bucket < ARRAY_SIZE(unix_socket_table));
+ spin_unlock(&net->unx.table.locks[bucket]);
+
+ *pos = set_bucket_offset(++bucket, 1);
+ }
return NULL;
}
+static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
+ loff_t *pos)
+{
+ unsigned long bucket = get_bucket(*pos);
+
+ sk = sk_next(sk);
+ if (sk)
+ return sk;
+
+
+ spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
+
+ *pos = set_bucket_offset(++bucket, 1);
+
+ return unix_get_first(seq, pos);
+}
+
static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
{
if (!*pos)
return SEQ_START_TOKEN;
- if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
- return NULL;
-
- return unix_next_socket(seq, NULL, pos);
+ return unix_get_first(seq, pos);
}
static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return unix_next_socket(seq, v, pos);
+
+ if (v == SEQ_START_TOKEN)
+ return unix_get_first(seq, pos);
+
+ return unix_get_next(seq, v, pos);
}
static void unix_seq_stop(struct seq_file *seq, void *v)
@@ -3290,7 +3328,7 @@ static void unix_seq_stop(struct seq_file *seq, void *v)
struct sock *sk = v;
if (sk)
- spin_unlock(&unix_table_locks[sk->sk_hash]);
+ spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
}
static int unix_seq_show(struct seq_file *seq, void *v)
@@ -3315,7 +3353,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
sock_i_ino(s));
- if (u->addr) { // under unix_table_locks here
+ if (u->addr) { // under a hash table lock here
int i, len;
seq_putc(seq, ' ');
@@ -3347,6 +3385,15 @@ static const struct seq_operations unix_seq_ops = {
};
#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_unix_iter_state {
+ struct seq_net_private p;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
struct bpf_iter__unix {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct unix_sock *, unix_sk);
@@ -3365,24 +3412,153 @@ static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
return bpf_iter_run_prog(prog, &ctx);
}
+static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
+
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+
+ expected++;
+ }
+
+ spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
+
+ return expected;
+}
+
+static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
+ unsigned int new_batch_sz)
+{
+ struct sock **new_batch;
+
+ new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
+ return -ENOMEM;
+
+ bpf_iter_unix_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
+ loff_t *pos)
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ unsigned int expected;
+ bool resized = false;
+ struct sock *sk;
+
+ if (iter->st_bucket_done)
+ *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
+
+again:
+ /* Get a new batch */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+
+ sk = unix_get_first(seq, pos);
+ if (!sk)
+ return NULL; /* Done */
+
+ expected = bpf_iter_unix_hold_batch(seq, sk);
+
+ if (iter->end_sk == expected) {
+ iter->st_bucket_done = true;
+ return sk;
+ }
+
+ if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
+ resized = true;
+ goto again;
+ }
+
+ return sk;
+}
+
+static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (!*pos)
+ return SEQ_START_TOKEN;
+
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ return bpf_iter_unix_batch(seq, pos);
+}
+
+static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_unix_iter_state *iter = seq->private;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so advance to the next sk in
+ * the batch.
+ */
+ if (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+
+ ++*pos;
+
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ sk = bpf_iter_unix_batch(seq, pos);
+
+ return sk;
+}
+
static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
uid_t uid;
+ bool slow;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ slow = lock_sock_fast(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return unix_prog_seq_show(prog, &meta, v, uid);
+ ret = unix_prog_seq_show(prog, &meta, v, uid);
+unlock:
+ unlock_sock_fast(sk, slow);
+ return ret;
}
static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_unix_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
@@ -3393,12 +3569,13 @@ static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
(void)unix_prog_seq_show(prog, &meta, v, 0);
}
- unix_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk)
+ bpf_iter_unix_put_batch(iter);
}
static const struct seq_operations bpf_iter_unix_seq_ops = {
- .start = unix_seq_start,
- .next = unix_seq_next,
+ .start = bpf_iter_unix_seq_start,
+ .next = bpf_iter_unix_seq_next,
.stop = bpf_iter_unix_seq_stop,
.show = bpf_iter_unix_seq_show,
};
@@ -3414,7 +3591,7 @@ static const struct net_proto_family unix_family_ops = {
static int __net_init unix_net_init(struct net *net)
{
- int error = -ENOMEM;
+ int i;
net->unx.sysctl_max_dgram_qlen = 10;
if (unix_sysctl_register(net))
@@ -3422,18 +3599,44 @@ static int __net_init unix_net_init(struct net *net)
#ifdef CONFIG_PROC_FS
if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
- sizeof(struct seq_net_private))) {
- unix_sysctl_unregister(net);
- goto out;
+ sizeof(struct seq_net_private)))
+ goto err_sysctl;
+#endif
+
+ net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
+ sizeof(spinlock_t), GFP_KERNEL);
+ if (!net->unx.table.locks)
+ goto err_proc;
+
+ net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->unx.table.buckets)
+ goto free_locks;
+
+ for (i = 0; i < UNIX_HASH_SIZE; i++) {
+ spin_lock_init(&net->unx.table.locks[i]);
+ INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}
+
+ return 0;
+
+free_locks:
+ kvfree(net->unx.table.locks);
+err_proc:
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("unix", net->proc_net);
+err_sysctl:
#endif
- error = 0;
+ unix_sysctl_unregister(net);
out:
- return error;
+ return -ENOMEM;
}
static void __net_exit unix_net_exit(struct net *net)
{
+ kvfree(net->unx.table.buckets);
+ kvfree(net->unx.table.locks);
unix_sysctl_unregister(net);
remove_proc_entry("unix", net->proc_net);
}
@@ -3447,13 +3650,55 @@ static struct pernet_operations unix_net_ops = {
DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
struct unix_sock *unix_sk, uid_t uid)
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct bpf_unix_iter_state *iter = priv_data;
+ int err;
+
+ err = bpf_iter_init_seq_net(priv_data, aux);
+ if (err)
+ return err;
+
+ err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
+ if (err) {
+ bpf_iter_fini_seq_net(priv_data);
+ return err;
+ }
+
+ return 0;
+}
+
+static void bpf_iter_fini_unix(void *priv_data)
+{
+ struct bpf_unix_iter_state *iter = priv_data;
+
+ bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
+}
+
static const struct bpf_iter_seq_info unix_seq_info = {
.seq_ops = &bpf_iter_unix_seq_ops,
- .init_seq_private = bpf_iter_init_seq_net,
- .fini_seq_private = bpf_iter_fini_seq_net,
- .seq_priv_size = sizeof(struct seq_net_private),
+ .init_seq_private = bpf_iter_init_unix,
+ .fini_seq_private = bpf_iter_fini_unix,
+ .seq_priv_size = sizeof(struct bpf_unix_iter_state),
};
+static const struct bpf_func_proto *
+bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_sk_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_sk_getsockopt_proto;
+ default:
+ return NULL;
+ }
+}
+
static struct bpf_iter_reg unix_reg_info = {
.target = "unix",
.ctx_arg_info_size = 1,
@@ -3461,6 +3706,7 @@ static struct bpf_iter_reg unix_reg_info = {
{ offsetof(struct bpf_iter__unix, unix_sk),
PTR_TO_BTF_ID_OR_NULL },
},
+ .get_func_proto = bpf_iter_unix_get_func_proto,
.seq_info = &unix_seq_info,
};
@@ -3478,8 +3724,10 @@ static int __init af_unix_init(void)
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
- for (i = 0; i < 2 * UNIX_HASH_SIZE; i++)
- spin_lock_init(&unix_table_locks[i]);
+ for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
+ spin_lock_init(&bsd_socket_locks[i]);
+ INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
+ }
rc = proto_register(&unix_dgram_proto, 1);
if (rc != 0) {
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bb0b5ea1655f..105f522a89fe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -13,7 +13,7 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
- /* might or might not have unix_table_locks */
+ /* might or might not have a hash table lock */
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr)
@@ -195,25 +195,21 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct unix_diag_req *req;
- int num, s_num, slot, s_slot;
struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
+ struct unix_diag_req *req;
req = nlmsg_data(cb->nlh);
s_slot = cb->args[0];
num = s_num = cb->args[1];
- for (slot = s_slot;
- slot < ARRAY_SIZE(unix_socket_table);
- s_num = 0, slot++) {
+ for (slot = s_slot; slot < UNIX_HASH_SIZE; s_num = 0, slot++) {
struct sock *sk;
num = 0;
- spin_lock(&unix_table_locks[slot]);
- sk_for_each(sk, &unix_socket_table[slot]) {
- if (!net_eq(sock_net(sk), net))
- continue;
+ spin_lock(&net->unx.table.locks[slot]);
+ sk_for_each(sk, &net->unx.table.buckets[slot]) {
if (num < s_num)
goto next;
if (!(req->udiag_states & (1 << sk->sk_state)))
@@ -222,13 +218,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) {
- spin_unlock(&unix_table_locks[slot]);
+ spin_unlock(&net->unx.table.locks[slot]);
goto done;
}
next:
num++;
}
- spin_unlock(&unix_table_locks[slot]);
+ spin_unlock(&net->unx.table.locks[slot]);
}
done:
cb->args[0] = slot;
@@ -237,20 +233,21 @@ done:
return skb->len;
}
-static struct sock *unix_lookup_by_ino(unsigned int ino)
+static struct sock *unix_lookup_by_ino(struct net *net, unsigned int ino)
{
struct sock *sk;
int i;
- for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
- spin_lock(&unix_table_locks[i]);
- sk_for_each(sk, &unix_socket_table[i])
+ for (i = 0; i < UNIX_HASH_SIZE; i++) {
+ spin_lock(&net->unx.table.locks[i]);
+ sk_for_each(sk, &net->unx.table.buckets[i]) {
if (ino == sock_i_ino(sk)) {
sock_hold(sk);
- spin_unlock(&unix_table_locks[i]);
+ spin_unlock(&net->unx.table.locks[i]);
return sk;
}
- spin_unlock(&unix_table_locks[i]);
+ }
+ spin_unlock(&net->unx.table.locks[i]);
}
return NULL;
}
@@ -259,21 +256,20 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
struct unix_diag_req *req)
{
- int err = -EINVAL;
- struct sock *sk;
- struct sk_buff *rep;
- unsigned int extra_len;
struct net *net = sock_net(in_skb->sk);
+ unsigned int extra_len;
+ struct sk_buff *rep;
+ struct sock *sk;
+ int err;
+ err = -EINVAL;
if (req->udiag_ino == 0)
goto out_nosk;
- sk = unix_lookup_by_ino(req->udiag_ino);
+ sk = unix_lookup_by_ino(net, req->udiag_ino);
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
- if (!net_eq(sock_net(sk), net))
- goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)
@@ -308,7 +304,6 @@ out_nosk:
static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
{
int hdrlen = sizeof(struct unix_diag_req);
- struct net *net = sock_net(skb->sk);
if (nlmsg_len(h) < hdrlen)
return -EINVAL;
@@ -317,7 +312,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
struct netlink_dump_control c = {
.dump = unix_diag_dump,
};
- return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ return netlink_dump_start(sock_net(skb->sk)->diag_nlsk, skb, h, &c);
} else
return unix_diag_get_exact(skb, h, nlmsg_data(h));
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 12e2ddaf887f..dc2763540393 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -192,8 +192,11 @@ void wait_for_unix_gc(void)
{
/* If number of inflight sockets is insane,
* force a garbage collect right now.
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight() and gc_in_progress().
*/
- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
@@ -201,6 +204,7 @@ void wait_for_unix_gc(void)
/* The external entry point: unix_gc() */
void unix_gc(void)
{
+ struct sk_buff *next_skb, *skb;
struct unix_sock *u;
struct unix_sock *next;
struct sk_buff_head hitlist;
@@ -213,7 +217,9 @@ void unix_gc(void)
if (gc_in_progress)
goto out;
- gc_in_progress = true;
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, true);
+
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -292,14 +298,36 @@ void unix_gc(void)
spin_unlock(&unix_gc_lock);
+ /* We need io_uring to clean its registered files, ignore all io_uring
+ * originated skbs. It's fine as io_uring doesn't keep references to
+ * other io_uring instances and so killing all other files in the cycle
+ * will put all io_uring references forcing it to go through normal
+ * release.path eventually putting registered files.
+ */
+ skb_queue_walk_safe(&hitlist, skb, next_skb) {
+ if (skb->scm_io_uring) {
+ __skb_unlink(skb, &hitlist);
+ skb_queue_tail(&skb->sk->sk_receive_queue, skb);
+ }
+ }
+
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
spin_lock(&unix_gc_lock);
+ /* There could be io_uring registered files, just push them back to
+ * the inflight list
+ */
+ list_for_each_entry_safe(u, next, &gc_candidates, link)
+ list_move_tail(&u->link, &gc_inflight_list);
+
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
- gc_in_progress = false;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, false);
+
wake_up(&unix_gc_wait);
out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 052ae709ce28..aa27a02478dc 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp)
} else {
BUG_ON(list_empty(&u->link));
}
- unix_tot_inflight++;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
user->unix_inflight++;
spin_unlock(&unix_gc_lock);
@@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
- unix_tot_inflight--;
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
user->unix_inflight--;
spin_unlock(&unix_gc_lock);
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 01d44e2598e2..500129aa710c 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -26,11 +26,16 @@ int __net_init unix_sysctl_register(struct net *net)
{
struct ctl_table *table;
- table = kmemdup(unix_table, sizeof(unix_table), GFP_KERNEL);
- if (table == NULL)
- goto err_alloc;
+ if (net_eq(net, &init_net)) {
+ table = unix_table;
+ } else {
+ table = kmemdup(unix_table, sizeof(unix_table), GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+
+ table[0].data = &net->unx.sysctl_max_dgram_qlen;
+ }
- table[0].data = &net->unx.sysctl_max_dgram_qlen;
net->unx.ctl = register_net_sysctl(net, "net/unix", table);
if (net->unx.ctl == NULL)
goto err_reg;
@@ -38,7 +43,8 @@ int __net_init unix_sysctl_register(struct net *net)
return 0;
err_reg:
- kfree(table);
+ if (!net_eq(net, &init_net))
+ kfree(table);
err_alloc:
return -ENOMEM;
}
@@ -49,5 +55,6 @@ void unix_sysctl_unregister(struct net *net)
table = net->unx.ctl->ctl_table_arg;
unregister_net_sysctl_table(net->unx.ctl);
- kfree(table);
+ if (!net_eq(net, &init_net))
+ kfree(table);
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 452376c6f419..e9bf15513961 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -48,8 +48,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
}
static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags,
- int *addr_len)
+ size_t len, int flags, int *addr_len)
{
struct unix_sock *u = unix_sk(sk);
struct sk_psock *psock;
@@ -73,7 +72,7 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = unix_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
@@ -146,12 +145,12 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re
if (restore) {
sk->sk_write_space = psock->saved_write_space;
- WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ sock_replace_proto(sk, psock->sk_proto);
return 0;
}
unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
- WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot);
+ sock_replace_proto(sk, &unix_dgram_bpf_prot);
return 0;
}
@@ -159,12 +158,12 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r
{
if (restore) {
sk->sk_write_space = psock->saved_write_space;
- WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ sock_replace_proto(sk, psock->sk_proto);
return 0;
}
unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
- WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot);
+ sock_replace_proto(sk, &unix_stream_bpf_prot);
return 0;
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3235261f138d..884eca7f6743 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+ void (*fn)(struct sock *sk))
{
int i;
@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
struct vsock_sock *vsk;
list_for_each_entry(vsk, &vsock_connected_table[i],
- connected_table)
+ connected_table) {
+ if (vsk->transport != transport)
+ continue;
+
fn(sk_vsock(vsk));
+ }
}
spin_unlock_bh(&vsock_table_lock);
@@ -877,6 +882,16 @@ s64 vsock_stream_has_space(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+void vsock_data_ready(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat ||
+ sock_flag(sk, SOCK_DONE))
+ sk->sk_data_ready(sk);
+}
+EXPORT_SYMBOL_GPL(vsock_data_ready);
+
static int vsock_release(struct socket *sock)
{
__vsock_release(sock->sk, 0);
@@ -1061,8 +1076,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
if (transport && transport->stream_is_active(vsk) &&
!(sk->sk_shutdown & RCV_SHUTDOWN)) {
bool data_ready_now = false;
+ int target = sock_rcvlowat(sk, 0, INT_MAX);
int ret = transport->notify_poll_in(
- vsk, 1, &data_ready_now);
+ vsk, target, &data_ready_now);
if (ret < 0) {
mask |= EPOLLERR;
} else {
@@ -1281,6 +1297,7 @@ static void vsock_connect_timeout(struct work_struct *work)
if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
sk->sk_state = TCP_CLOSE;
+ sk->sk_socket->state = SS_UNCONNECTED;
sk->sk_err = ETIMEDOUT;
sk_error_report(sk);
vsock_transport_cancel_pkt(vsk);
@@ -1386,7 +1403,14 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
* timeout fires.
*/
sock_hold(sk);
- schedule_delayed_work(&vsk->connect_work, timeout);
+
+ /* If the timeout function is already scheduled,
+ * reschedule it, then ungrab the socket refcount to
+ * keep it balanced.
+ */
+ if (mod_delayed_work(system_wq, &vsk->connect_work,
+ timeout))
+ sock_put(sk);
/* Skip ahead to preserve error code set above. */
goto out_wait;
@@ -1401,6 +1425,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
+ vsock_remove_connected(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
@@ -1880,8 +1905,11 @@ static int vsock_connectible_wait_data(struct sock *sk,
err = 0;
transport = vsk->transport;
- while ((data = vsock_connectible_has_data(vsk)) == 0) {
+ while (1) {
prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
+ data = vsock_connectible_has_data(vsk);
+ if (data != 0)
+ break;
if (sk->sk_err != 0 ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
@@ -2067,8 +2095,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
const struct vsock_transport *transport;
int err;
- DEFINE_WAIT(wait);
-
sk = sock->sk;
vsk = vsock_sk(sk);
err = 0;
@@ -2123,6 +2149,25 @@ out:
return err;
}
+static int vsock_set_rcvlowat(struct sock *sk, int val)
+{
+ const struct vsock_transport *transport;
+ struct vsock_sock *vsk;
+
+ vsk = vsock_sk(sk);
+
+ if (val > vsk->buffer_size)
+ return -EINVAL;
+
+ transport = vsk->transport;
+
+ if (transport && transport->set_rcvlowat)
+ return transport->set_rcvlowat(vsk, val);
+
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+ return 0;
+}
+
static const struct proto_ops vsock_stream_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -2142,6 +2187,7 @@ static const struct proto_ops vsock_stream_ops = {
.recvmsg = vsock_connectible_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+ .set_rcvlowat = vsock_set_rcvlowat,
};
static const struct proto_ops vsock_seqpacket_ops = {
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index e111e13b6660..59c3e2697069 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -78,6 +78,9 @@ struct hvs_send_buf {
ALIGN((payload_len), 8) + \
VMBUS_PKT_TRAILER_SIZE)
+/* Upper bound on the size of a VMbus packet for hv_sock */
+#define HVS_MAX_PKT_SIZE HVS_PKT_LEN(HVS_MTU_SIZE)
+
union hvs_service_id {
guid_t srv_id;
@@ -378,6 +381,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
}
+ chan->max_pkt_size = HVS_MAX_PKT_SIZE;
+
ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
conn_from_host ? new : sk);
if (ret != 0) {
@@ -572,12 +577,18 @@ static bool hvs_dgram_allow(u32 cid, u32 port)
static int hvs_update_recv_data(struct hvsock *hvs)
{
struct hvs_recv_buf *recv_buf;
- u32 payload_len;
+ u32 pkt_len, payload_len;
+
+ pkt_len = hv_pkt_len(hvs->recv_desc);
+
+ if (pkt_len < HVS_HEADER_LEN)
+ return -EIO;
recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
payload_len = recv_buf->hdr.data_size;
- if (payload_len > HVS_MTU_SIZE)
+ if (payload_len > pkt_len - HVS_HEADER_LEN ||
+ payload_len > HVS_MTU_SIZE)
return -EIO;
if (payload_len == 0)
@@ -602,7 +613,9 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
return -EOPNOTSUPP;
if (need_refill) {
- hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
+ hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+ if (!hvs->recv_desc)
+ return -ENOBUFS;
ret = hvs_update_recv_data(hvs);
if (ret)
return ret;
@@ -616,7 +629,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
hvs->recv_data_len -= to_read;
if (hvs->recv_data_len == 0) {
- hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, hvs->recv_desc);
+ hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
if (hvs->recv_desc) {
ret = hvs_update_recv_data(hvs);
if (ret)
@@ -802,6 +815,12 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
return 0;
}
+static
+int hvs_set_rcvlowat(struct vsock_sock *vsk, int val)
+{
+ return -EOPNOTSUPP;
+}
+
static struct vsock_transport hvs_transport = {
.module = THIS_MODULE,
@@ -837,6 +856,7 @@ static struct vsock_transport hvs_transport = {
.notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
+ .set_rcvlowat = hvs_set_rcvlowat
};
static bool hvs_check_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 4f7c99dfd16c..ad64f403536a 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -24,6 +24,7 @@
static struct workqueue_struct *virtio_vsock_workqueue;
static struct virtio_vsock __rcu *the_virtio_vsock;
static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static struct virtio_transport virtio_transport; /* forward declaration */
struct virtio_vsock {
struct virtio_device *vdev;
@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
switch (le32_to_cpu(event->id)) {
case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
virtio_vsock_update_guest_cid(vsock);
- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ vsock_for_each_connected_socket(&virtio_transport.transport,
+ virtio_vsock_reset_sock);
break;
}
}
@@ -564,61 +566,29 @@ out:
mutex_unlock(&vsock->rx_lock);
}
-static int virtio_vsock_probe(struct virtio_device *vdev)
+static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
{
- vq_callback_t *callbacks[] = {
- virtio_vsock_rx_done,
- virtio_vsock_tx_done,
- virtio_vsock_event_done,
- };
+ struct virtio_device *vdev = vsock->vdev;
static const char * const names[] = {
"rx",
"tx",
"event",
};
- struct virtio_vsock *vsock = NULL;
+ vq_callback_t *callbacks[] = {
+ virtio_vsock_rx_done,
+ virtio_vsock_tx_done,
+ virtio_vsock_event_done,
+ };
int ret;
- ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
- if (ret)
- return ret;
-
- /* Only one virtio-vsock device per guest is supported */
- if (rcu_dereference_protected(the_virtio_vsock,
- lockdep_is_held(&the_virtio_vsock_mutex))) {
- ret = -EBUSY;
- goto out;
- }
-
- vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
- if (!vsock) {
- ret = -ENOMEM;
- goto out;
- }
-
- vsock->vdev = vdev;
-
- ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names,
+ ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names,
NULL);
if (ret < 0)
- goto out;
+ return ret;
virtio_vsock_update_guest_cid(vsock);
- vsock->rx_buf_nr = 0;
- vsock->rx_buf_max_nr = 0;
- atomic_set(&vsock->queued_replies, 0);
-
- mutex_init(&vsock->tx_lock);
- mutex_init(&vsock->rx_lock);
- mutex_init(&vsock->event_lock);
- spin_lock_init(&vsock->send_pkt_list_lock);
- INIT_LIST_HEAD(&vsock->send_pkt_list);
- INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
- INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
- INIT_WORK(&vsock->event_work, virtio_transport_event_work);
- INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+ virtio_device_ready(vdev);
mutex_lock(&vsock->tx_lock);
vsock->tx_run = true;
@@ -634,38 +604,20 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(&vsock->event_lock);
- if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
- vsock->seqpacket_allow = true;
-
- vdev->priv = vsock;
- rcu_assign_pointer(the_virtio_vsock, vsock);
-
- mutex_unlock(&the_virtio_vsock_mutex);
-
return 0;
-
-out:
- kfree(vsock);
- mutex_unlock(&the_virtio_vsock_mutex);
- return ret;
}
-static void virtio_vsock_remove(struct virtio_device *vdev)
+static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
{
- struct virtio_vsock *vsock = vdev->priv;
+ struct virtio_device *vdev = vsock->vdev;
struct virtio_vsock_pkt *pkt;
- mutex_lock(&the_virtio_vsock_mutex);
-
- vdev->priv = NULL;
- rcu_assign_pointer(the_virtio_vsock, NULL);
- synchronize_rcu();
-
- /* Reset all connected sockets when the device disappear */
- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ /* Reset all connected sockets when the VQs disappear */
+ vsock_for_each_connected_socket(&virtio_transport.transport,
+ virtio_vsock_reset_sock);
/* Stop all work handlers to make sure no one is accessing the device,
- * so we can safely call vdev->config->reset().
+ * so we can safely call virtio_reset_device().
*/
mutex_lock(&vsock->rx_lock);
vsock->rx_run = false;
@@ -682,7 +634,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
/* Flush all device writes and interrupts, device will not use any
* more buffers.
*/
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
mutex_lock(&vsock->rx_lock);
while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
@@ -705,6 +657,78 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
+}
+
+static int virtio_vsock_probe(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = NULL;
+ int ret;
+
+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
+ if (ret)
+ return ret;
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (rcu_dereference_protected(the_virtio_vsock,
+ lockdep_is_held(&the_virtio_vsock_mutex))) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsock->vdev = vdev;
+
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+ atomic_set(&vsock->queued_replies, 0);
+
+ mutex_init(&vsock->tx_lock);
+ mutex_init(&vsock->rx_lock);
+ mutex_init(&vsock->event_lock);
+ spin_lock_init(&vsock->send_pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
+ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
+ INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+
+ if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+ vsock->seqpacket_allow = true;
+
+ vdev->priv = vsock;
+
+ ret = virtio_vsock_vqs_init(vsock);
+ if (ret < 0)
+ goto out;
+
+ rcu_assign_pointer(the_virtio_vsock, vsock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ return 0;
+
+out:
+ kfree(vsock);
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+
+static void virtio_vsock_remove(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ vdev->priv = NULL;
+ rcu_assign_pointer(the_virtio_vsock, NULL);
+ synchronize_rcu();
+
+ virtio_vsock_vqs_del(vsock);
/* Other works can be queued before 'config->del_vqs()', so we flush
* all works before to free the vsock object to avoid use after free.
@@ -719,6 +743,49 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
kfree(vsock);
}
+#ifdef CONFIG_PM_SLEEP
+static int virtio_vsock_freeze(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ rcu_assign_pointer(the_virtio_vsock, NULL);
+ synchronize_rcu();
+
+ virtio_vsock_vqs_del(vsock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ return 0;
+}
+
+static int virtio_vsock_restore(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+ int ret;
+
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (rcu_dereference_protected(the_virtio_vsock,
+ lockdep_is_held(&the_virtio_vsock_mutex))) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = virtio_vsock_vqs_init(vsock);
+ if (ret < 0)
+ goto out;
+
+ rcu_assign_pointer(the_virtio_vsock, vsock);
+
+out:
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -736,6 +803,10 @@ static struct virtio_driver virtio_vsock_driver = {
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
+#ifdef CONFIG_PM_SLEEP
+ .freeze = virtio_vsock_freeze,
+ .restore = virtio_vsock_restore,
+#endif
};
static int __init virtio_vsock_init(void)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index ec2c2afbf0d0..a9980e9b9304 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -634,10 +634,7 @@ virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
bool *data_ready_now)
{
- if (vsock_stream_has_data(vsk))
- *data_ready_now = true;
- else
- *data_ready_now = false;
+ *data_ready_now = vsock_stream_has_data(vsk) >= target;
return 0;
}
@@ -1084,7 +1081,7 @@ virtio_transport_recv_connected(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RW:
virtio_transport_recv_enqueue(vsk, pkt);
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
virtio_transport_send_credit_update(vsk);
@@ -1342,7 +1339,7 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
{
- kfree(pkt->buf);
+ kvfree(pkt->buf);
kfree(pkt);
}
EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7aef34e32bdf..842c94286d31 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
static int PROTOCOL_OVERRIDE = -1;
+static struct vsock_transport vmci_transport; /* forward declaration */
+
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id,
const struct vmci_event_data *e_data,
void *client_data)
{
- vsock_for_each_connected_socket(vmci_transport_handle_detach);
+ vsock_for_each_connected_socket(&vmci_transport,
+ vmci_transport_handle_detach);
}
static void vmci_transport_recv_pkt_work(struct work_struct *work)
@@ -948,7 +951,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
* for ourself or any previous connection requests that we received.
* If it's the latter, we try to find a socket in our list of pending
* connections and, if we do, call the appropriate handler for the
- * state that that socket is in. Otherwise we try to service the
+ * state that socket is in. Otherwise we try to service the
* connection request.
*/
pending = vmci_transport_get_pending(sk, pkt);
@@ -1729,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
int flags)
{
int err;
- int noblock;
struct vmci_datagram *dg;
size_t payload_len;
struct sk_buff *skb;
- noblock = flags & MSG_DONTWAIT;
-
if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
return -EOPNOTSUPP;
/* Retrieve the head sk_buff from the socket's receive queue. */
err = 0;
- skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+ skb = skb_recv_datagram(&vsk->sk, flags, &err);
if (!skb)
return err;
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index d69fc4b595ad..7c3a7db134b2 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -307,7 +307,7 @@ vmci_transport_handle_wrote(struct sock *sk,
struct vsock_sock *vsk = vsock_sk(sk);
PKT_FIELD(vsk, sent_waiting_read) = false;
#endif
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
@@ -340,12 +340,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsock_stream_has_data(vsk)) {
+ if (vsock_stream_has_data(vsk) >= target) {
*data_ready_now = true;
} else {
- /* We can't read right now because there is nothing in the
- * queue. Ask for notifications when there is something to
- * read.
+ /* We can't read right now because there is not enough data
+ * in the queue. Ask for notifications when there is something
+ * to read.
*/
if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index 0f36d7c45db3..e96a88d850a8 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -84,7 +84,7 @@ vmci_transport_handle_wrote(struct sock *sk,
bool bottom_half,
struct sockaddr_vm *dst, struct sockaddr_vm *src)
{
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
static void vsock_block_update_write_window(struct sock *sk)
@@ -161,12 +161,12 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsock_stream_has_data(vsk)) {
+ if (vsock_stream_has_data(vsk) >= target) {
*data_ready_now = true;
} else {
- /* We can't read right now because there is nothing in the
- * queue. Ask for notifications when there is something to
- * read.
+ /* We can't read right now because there is not enough data
+ * in the queue. Ask for notifications when there is something
+ * to read.
*/
if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
@@ -282,7 +282,7 @@ vmci_transport_notify_pkt_recv_post_dequeue(
/* See the comment in
* vmci_transport_notify_pkt_send_post_enqueue().
*/
- sk->sk_data_ready(sk);
+ vsock_data_ready(sk);
}
return err;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 756e7de7e33f..527ae669f6f7 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -33,8 +33,8 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
) > $@
-$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
- $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \
+ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)
@$(kecho) " GEN $@"
$(Q)(set -e; \
allf=""; \
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 550ac9d827fe..e68923200018 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -1,4 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+/*
+ * Parts of this file are
+ * Copyright (C) 2022 Intel Corporation
+ */
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/cfg80211.h>
@@ -7,8 +11,9 @@
#include "rdev-ops.h"
-int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool notify)
+static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, unsigned int link_id,
+ bool notify)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
@@ -22,15 +27,16 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
return -EOPNOTSUPP;
- if (!wdev->beacon_interval)
+ if (!wdev->links[link_id].ap.beacon_interval)
return -ENOENT;
- err = rdev_stop_ap(rdev, dev);
+ err = rdev_stop_ap(rdev, dev, link_id);
if (!err) {
wdev->conn_owner_nlportid = 0;
- wdev->beacon_interval = 0;
- memset(&wdev->chandef, 0, sizeof(wdev->chandef));
- wdev->ssid_len = 0;
+ wdev->links[link_id].ap.beacon_interval = 0;
+ memset(&wdev->links[link_id].ap.chandef, 0,
+ sizeof(wdev->links[link_id].ap.chandef));
+ wdev->u.ap.ssid_len = 0;
rdev_set_qos_map(rdev, dev, NULL);
if (notify)
nl80211_send_ap_stopped(wdev);
@@ -46,14 +52,36 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
return err;
}
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, int link_id,
+ bool notify)
+{
+ unsigned int link;
+ int ret = 0;
+
+ if (link_id >= 0)
+ return ___cfg80211_stop_ap(rdev, dev, link_id, notify);
+
+ for_each_valid_link(dev->ieee80211_ptr, link) {
+ int ret1 = ___cfg80211_stop_ap(rdev, dev, link, notify);
+
+ if (ret1)
+ ret = ret1;
+ /* try the next one also if one errored */
+ }
+
+ return ret;
+}
+
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool notify)
+ struct net_device *dev, int link_id,
+ bool notify)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
wdev_lock(wdev);
- err = __cfg80211_stop_ap(rdev, dev, notify);
+ err = __cfg80211_stop_ap(rdev, dev, link_id, notify);
wdev_unlock(wdev);
return err;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index eb822052d344..0e5835cd8c61 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2021 Intel Corporation
+ * Copyright 2018-2022 Intel Corporation
*/
#include <linux/export.h>
@@ -181,6 +181,9 @@ static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
case NL80211_CHAN_WIDTH_160:
mhz = 160;
break;
+ case NL80211_CHAN_WIDTH_320:
+ mhz = 320;
+ break;
default:
WARN_ON_ONCE(1);
return -1;
@@ -271,6 +274,17 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
case NL80211_CHAN_WIDTH_16:
/* all checked above */
break;
+ case NL80211_CHAN_WIDTH_320:
+ if (chandef->center_freq1 == control_freq + 150 ||
+ chandef->center_freq1 == control_freq + 130 ||
+ chandef->center_freq1 == control_freq + 110 ||
+ chandef->center_freq1 == control_freq + 90 ||
+ chandef->center_freq1 == control_freq - 90 ||
+ chandef->center_freq1 == control_freq - 110 ||
+ chandef->center_freq1 == control_freq - 130 ||
+ chandef->center_freq1 == control_freq - 150)
+ break;
+ fallthrough;
case NL80211_CHAN_WIDTH_160:
if (chandef->center_freq1 == control_freq + 70 ||
chandef->center_freq1 == control_freq + 50 ||
@@ -307,7 +321,7 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
EXPORT_SYMBOL(cfg80211_chandef_valid);
static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
- u32 *pri40, u32 *pri80)
+ u32 *pri40, u32 *pri80, u32 *pri160)
{
int tmp;
@@ -315,9 +329,11 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
case NL80211_CHAN_WIDTH_40:
*pri40 = c->center_freq1;
*pri80 = 0;
+ *pri160 = 0;
break;
case NL80211_CHAN_WIDTH_80:
case NL80211_CHAN_WIDTH_80P80:
+ *pri160 = 0;
*pri80 = c->center_freq1;
/* n_P20 */
tmp = (30 + c->chan->center_freq - c->center_freq1)/20;
@@ -327,6 +343,7 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
*pri40 = c->center_freq1 - 20 + 40 * tmp;
break;
case NL80211_CHAN_WIDTH_160:
+ *pri160 = c->center_freq1;
/* n_P20 */
tmp = (70 + c->chan->center_freq - c->center_freq1)/20;
/* n_P40 */
@@ -337,6 +354,20 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c,
tmp /= 2;
*pri80 = c->center_freq1 - 40 + 80 * tmp;
break;
+ case NL80211_CHAN_WIDTH_320:
+ /* n_P20 */
+ tmp = (150 + c->chan->center_freq - c->center_freq1) / 20;
+ /* n_P40 */
+ tmp /= 2;
+ /* freq_P40 */
+ *pri40 = c->center_freq1 - 140 + 40 * tmp;
+ /* n_P80 */
+ tmp /= 2;
+ *pri80 = c->center_freq1 - 120 + 80 * tmp;
+ /* n_P160 */
+ tmp /= 2;
+ *pri160 = c->center_freq1 - 80 + 160 * tmp;
+ break;
default:
WARN_ON_ONCE(1);
}
@@ -346,7 +377,7 @@ const struct cfg80211_chan_def *
cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
const struct cfg80211_chan_def *c2)
{
- u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80;
+ u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80, c1_pri160, c2_pri160;
/* If they are identical, return */
if (cfg80211_chandef_identical(c1, c2))
@@ -381,14 +412,31 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1,
c2->width == NL80211_CHAN_WIDTH_20)
return c1;
- chandef_primary_freqs(c1, &c1_pri40, &c1_pri80);
- chandef_primary_freqs(c2, &c2_pri40, &c2_pri80);
+ chandef_primary_freqs(c1, &c1_pri40, &c1_pri80, &c1_pri160);
+ chandef_primary_freqs(c2, &c2_pri40, &c2_pri80, &c2_pri160);
if (c1_pri40 != c2_pri40)
return NULL;
- WARN_ON(!c1_pri80 && !c2_pri80);
- if (c1_pri80 && c2_pri80 && c1_pri80 != c2_pri80)
+ if (c1->width == NL80211_CHAN_WIDTH_40)
+ return c2;
+
+ if (c2->width == NL80211_CHAN_WIDTH_40)
+ return c1;
+
+ if (c1_pri80 != c2_pri80)
+ return NULL;
+
+ if (c1->width == NL80211_CHAN_WIDTH_80 &&
+ c2->width > NL80211_CHAN_WIDTH_80)
+ return c2;
+
+ if (c2->width == NL80211_CHAN_WIDTH_80 &&
+ c1->width > NL80211_CHAN_WIDTH_80)
+ return c1;
+
+ WARN_ON(!c1_pri160 && !c2_pri160);
+ if (c1_pri160 && c2_pri160 && c1_pri160 != c2_pri160)
return NULL;
if (c1->width > c2->width)
@@ -624,14 +672,21 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
* range of chandef.
*/
bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
- struct ieee80211_channel *chan)
+ struct ieee80211_channel *chan,
+ bool primary_only)
{
int width;
u32 freq;
+ if (!chandef->chan)
+ return false;
+
if (chandef->chan->center_freq == chan->center_freq)
return true;
+ if (primary_only)
+ return false;
+
width = cfg80211_chandef_get_width(chandef);
if (width <= 20)
return false;
@@ -656,23 +711,25 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev)
{
- bool active = false;
+ unsigned int link;
ASSERT_WDEV_LOCK(wdev);
- if (!wdev->chandef.chan)
- return false;
-
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- active = wdev->beacon_interval != 0;
+ for_each_valid_link(wdev, link) {
+ if (wdev->links[link].ap.beacon_interval)
+ return true;
+ }
break;
case NL80211_IFTYPE_ADHOC:
- active = wdev->ssid_len != 0;
+ if (wdev->u.ibss.ssid_len)
+ return true;
break;
case NL80211_IFTYPE_MESH_POINT:
- active = wdev->mesh_id_len != 0;
+ if (wdev->u.mesh.id_len)
+ return true;
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_OCB:
@@ -689,7 +746,35 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev)
WARN_ON(1);
}
- return active;
+ return false;
+}
+
+bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan,
+ bool primary_only)
+{
+ unsigned int link;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ for_each_valid_link(wdev, link) {
+ if (cfg80211_is_sub_chan(&wdev->links[link].ap.chandef,
+ chan, primary_only))
+ return true;
+ }
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ return cfg80211_is_sub_chan(&wdev->u.ibss.chandef, chan,
+ primary_only);
+ case NL80211_IFTYPE_MESH_POINT:
+ return cfg80211_is_sub_chan(&wdev->u.mesh.chandef, chan,
+ primary_only);
+ default:
+ break;
+ }
+
+ return false;
}
static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy,
@@ -704,7 +789,7 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy,
continue;
}
- if (cfg80211_is_sub_chan(&wdev->chandef, chan)) {
+ if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) {
wdev_unlock(wdev);
return true;
}
@@ -724,7 +809,8 @@ cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev,
if (!cfg80211_chandef_valid(&rdev->background_radar_chandef))
return false;
- return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel);
+ return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel,
+ false);
}
bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
@@ -960,7 +1046,10 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
struct ieee80211_sta_vht_cap *vht_cap;
struct ieee80211_edmg *edmg_cap;
u32 width, control_freq, cap;
- bool ext_nss_cap, support_80_80 = false;
+ bool ext_nss_cap, support_80_80 = false, support_320 = false;
+ const struct ieee80211_sband_iftype_data *iftd;
+ struct ieee80211_supported_band *sband;
+ int i;
if (WARN_ON(!cfg80211_chandef_valid(chandef)))
return false;
@@ -1062,6 +1151,32 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
(vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)))
return false;
break;
+ case NL80211_CHAN_WIDTH_320:
+ prohibited_flags |= IEEE80211_CHAN_NO_320MHZ;
+ width = 320;
+
+ if (chandef->chan->band != NL80211_BAND_6GHZ)
+ return false;
+
+ sband = wiphy->bands[NL80211_BAND_6GHZ];
+ if (!sband)
+ return false;
+
+ for (i = 0; i < sband->n_iftype_data; i++) {
+ iftd = &sband->iftype_data[i];
+ if (!iftd->eht_cap.has_eht)
+ continue;
+
+ if (iftd->eht_cap.eht_cap_elem.phy_cap_info[0] &
+ IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) {
+ support_320 = true;
+ break;
+ }
+ }
+
+ if (!support_320)
+ return false;
+ break;
default:
WARN_ON_ONCE(1);
return false;
@@ -1099,6 +1214,68 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_chandef_usable);
+static bool cfg80211_ir_permissive_check_wdev(enum nl80211_iftype iftype,
+ struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
+{
+ struct ieee80211_channel *other_chan = NULL;
+ unsigned int link_id;
+ int r1, r2;
+
+ for_each_valid_link(wdev, link_id) {
+ if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ wdev->links[link_id].client.current_bss)
+ other_chan = wdev->links[link_id].client.current_bss->pub.channel;
+
+ /*
+ * If a GO already operates on the same GO_CONCURRENT channel,
+ * this one (maybe the same one) can beacon as well. We allow
+ * the operation even if the station we relied on with
+ * GO_CONCURRENT is disconnected now. But then we must make sure
+ * we're not outdoor on an indoor-only channel.
+ */
+ if (iftype == NL80211_IFTYPE_P2P_GO &&
+ wdev->iftype == NL80211_IFTYPE_P2P_GO &&
+ wdev->links[link_id].ap.beacon_interval &&
+ !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
+ other_chan = wdev->links[link_id].ap.chandef.chan;
+
+ if (!other_chan)
+ continue;
+
+ if (chan == other_chan)
+ return true;
+
+ if (chan->band != NL80211_BAND_5GHZ &&
+ chan->band != NL80211_BAND_6GHZ)
+ continue;
+
+ r1 = cfg80211_get_unii(chan->center_freq);
+ r2 = cfg80211_get_unii(other_chan->center_freq);
+
+ if (r1 != -EINVAL && r1 == r2) {
+ /*
+ * At some locations channels 149-165 are considered a
+ * bundle, but at other locations, e.g., Indonesia,
+ * channels 149-161 are considered a bundle while
+ * channel 165 is left out and considered to be in a
+ * different bundle. Thus, in case that there is a
+ * station interface connected to an AP on channel 165,
+ * it is assumed that channels 149-161 are allowed for
+ * GO operations. However, having a station interface
+ * connected to an AP on channels 149-161, does not
+ * allow GO operation on channel 165.
+ */
+ if (chan->center_freq == 5825 &&
+ other_chan->center_freq != 5825)
+ continue;
+ return true;
+ }
+ }
+
+ return false;
+}
+
/*
* Check if the channel can be used under permissive conditions mandated by
* some regulatory bodies, i.e., the channel is marked with
@@ -1142,59 +1319,14 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
* the current registered device.
*/
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
- struct ieee80211_channel *other_chan = NULL;
- int r1, r2;
+ bool ret;
wdev_lock(wdev);
- if (wdev->iftype == NL80211_IFTYPE_STATION &&
- wdev->current_bss)
- other_chan = wdev->current_bss->pub.channel;
-
- /*
- * If a GO already operates on the same GO_CONCURRENT channel,
- * this one (maybe the same one) can beacon as well. We allow
- * the operation even if the station we relied on with
- * GO_CONCURRENT is disconnected now. But then we must make sure
- * we're not outdoor on an indoor-only channel.
- */
- if (iftype == NL80211_IFTYPE_P2P_GO &&
- wdev->iftype == NL80211_IFTYPE_P2P_GO &&
- wdev->beacon_interval &&
- !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
- other_chan = wdev->chandef.chan;
+ ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan);
wdev_unlock(wdev);
- if (!other_chan)
- continue;
-
- if (chan == other_chan)
- return true;
-
- if (chan->band != NL80211_BAND_5GHZ &&
- chan->band != NL80211_BAND_6GHZ)
- continue;
-
- r1 = cfg80211_get_unii(chan->center_freq);
- r2 = cfg80211_get_unii(other_chan->center_freq);
-
- if (r1 != -EINVAL && r1 == r2) {
- /*
- * At some locations channels 149-165 are considered a
- * bundle, but at other locations, e.g., Indonesia,
- * channels 149-161 are considered a bundle while
- * channel 165 is left out and considered to be in a
- * different bundle. Thus, in case that there is a
- * station interface connected to an AP on channel 165,
- * it is assumed that channels 149-161 are allowed for
- * GO operations. However, having a station interface
- * connected to an AP on channels 149-161, does not
- * allow GO operation on channel 165.
- */
- if (chan->center_freq == 5825 &&
- other_chan->center_freq != 5825)
- continue;
- return true;
- }
+ if (ret)
+ return ret;
}
return false;
@@ -1267,97 +1399,6 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
return rdev_set_monitor_channel(rdev, chandef);
}
-void
-cfg80211_get_chan_state(struct wireless_dev *wdev,
- struct ieee80211_channel **chan,
- enum cfg80211_chan_mode *chanmode,
- u8 *radar_detect)
-{
- int ret;
-
- *chan = NULL;
- *chanmode = CHAN_MODE_UNDEFINED;
-
- ASSERT_WDEV_LOCK(wdev);
-
- if (wdev->netdev && !netif_running(wdev->netdev))
- return;
-
- switch (wdev->iftype) {
- case NL80211_IFTYPE_ADHOC:
- if (wdev->current_bss) {
- *chan = wdev->current_bss->pub.channel;
- *chanmode = (wdev->ibss_fixed &&
- !wdev->ibss_dfs_possible)
- ? CHAN_MODE_SHARED
- : CHAN_MODE_EXCLUSIVE;
-
- /* consider worst-case - IBSS can try to return to the
- * original user-specified channel as creator */
- if (wdev->ibss_dfs_possible)
- *radar_detect |= BIT(wdev->chandef.width);
- return;
- }
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- if (wdev->current_bss) {
- *chan = wdev->current_bss->pub.channel;
- *chanmode = CHAN_MODE_SHARED;
- return;
- }
- break;
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_P2P_GO:
- if (wdev->cac_started) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
- *radar_detect |= BIT(wdev->chandef.width);
- } else if (wdev->beacon_interval) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
-
- ret = cfg80211_chandef_dfs_required(wdev->wiphy,
- &wdev->chandef,
- wdev->iftype);
- WARN_ON(ret < 0);
- if (ret > 0)
- *radar_detect |= BIT(wdev->chandef.width);
- }
- return;
- case NL80211_IFTYPE_MESH_POINT:
- if (wdev->mesh_id_len) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
-
- ret = cfg80211_chandef_dfs_required(wdev->wiphy,
- &wdev->chandef,
- wdev->iftype);
- WARN_ON(ret < 0);
- if (ret > 0)
- *radar_detect |= BIT(wdev->chandef.width);
- }
- return;
- case NL80211_IFTYPE_OCB:
- if (wdev->chandef.chan) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
- return;
- }
- break;
- case NL80211_IFTYPE_MONITOR:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_DEVICE:
- case NL80211_IFTYPE_NAN:
- /* these interface types don't really have a channel */
- return;
- case NL80211_IFTYPE_UNSPECIFIED:
- case NL80211_IFTYPE_WDS:
- case NUM_NL80211_IFTYPES:
- WARN_ON(1);
- }
-}
-
bool cfg80211_any_usable_channels(struct wiphy *wiphy,
unsigned long sband_mask,
u32 prohibited_flags)
@@ -1388,3 +1429,34 @@ bool cfg80211_any_usable_channels(struct wiphy *wiphy,
return false;
}
EXPORT_SYMBOL(cfg80211_any_usable_channels);
+
+struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ /*
+ * We need to sort out the locking here - in some cases
+ * where we get here we really just don't care (yet)
+ * about the valid links, but in others we do. But we
+ * get here with various driver cases, so we cannot
+ * easily require the wdev mutex.
+ */
+ if (link_id || wdev->valid_links & BIT(0)) {
+ ASSERT_WDEV_LOCK(wdev);
+ WARN_ON(!(wdev->valid_links & BIT(link_id)));
+ }
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_MESH_POINT:
+ return &wdev->u.mesh.chandef;
+ case NL80211_IFTYPE_ADHOC:
+ return &wdev->u.ibss.chandef;
+ case NL80211_IFTYPE_OCB:
+ return &wdev->u.ocb.chandef;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ return &wdev->links[link_id].ap.chandef;
+ default:
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(wdev_chandef);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3a54c8e6b6c6..5b0c4d5b80cf 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -332,29 +332,20 @@ static void cfg80211_event_work(struct work_struct *work)
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev, *tmp;
- bool found = false;
ASSERT_RTNL();
- list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
if (wdev->nl_owner_dead) {
if (wdev->netdev)
dev_close(wdev->netdev);
- found = true;
- }
- }
-
- if (!found)
- return;
- wiphy_lock(&rdev->wiphy);
- list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
- if (wdev->nl_owner_dead) {
+ wiphy_lock(&rdev->wiphy);
cfg80211_leave(rdev, wdev);
- rdev_del_virtual_intf(rdev, wdev);
+ cfg80211_remove_virtual_intf(rdev, wdev);
+ wiphy_unlock(&rdev->wiphy);
}
}
- wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_destroy_iface_wk(struct work_struct *work)
@@ -869,6 +860,9 @@ int wiphy_register(struct wiphy *wiphy)
for (i = 0; i < sband->n_iftype_data; i++) {
const struct ieee80211_sband_iftype_data *iftd;
+ bool has_ap, has_non_ap;
+ u32 ap_bits = BIT(NL80211_IFTYPE_AP) |
+ BIT(NL80211_IFTYPE_P2P_GO);
iftd = &sband->iftype_data[i];
@@ -888,6 +882,19 @@ int wiphy_register(struct wiphy *wiphy)
else
have_he = have_he &&
iftd->he_cap.has_he;
+
+ has_ap = iftd->types_mask & ap_bits;
+ has_non_ap = iftd->types_mask & ~ap_bits;
+
+ /*
+ * For EHT 20 MHz STA, the capabilities format differs
+ * but to simplify, don't check 20 MHz but rather check
+ * only if AP and non-AP were mentioned at the same time,
+ * reject if so.
+ */
+ if (WARN_ON(iftd->eht_cap.has_eht &&
+ has_ap && has_non_ap))
+ return -EINVAL;
}
if (WARN_ON(!have_he && band == NL80211_BAND_6GHZ))
@@ -922,6 +929,12 @@ int wiphy_register(struct wiphy *wiphy)
return -EINVAL;
#endif
+ if (!wiphy->max_num_akm_suites)
+ wiphy->max_num_akm_suites = NL80211_MAX_NR_AKM_SUITES;
+ else if (wiphy->max_num_akm_suites < NL80211_MAX_NR_AKM_SUITES ||
+ wiphy->max_num_akm_suites > CFG80211_MAX_NUM_AKM_SUITES)
+ return -EINVAL;
+
/* check and set up bitrates */
ieee80211_set_bitrate_flags(wiphy);
@@ -1127,6 +1140,7 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
bool unregister_netdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ unsigned int link_id;
ASSERT_RTNL();
lockdep_assert_held(&rdev->wiphy.mtx);
@@ -1176,11 +1190,22 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
*/
cfg80211_process_wdev_events(wdev);
- if (WARN_ON(wdev->current_bss)) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
- wdev->current_bss = NULL;
+ if (wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) {
+ for (link_id = 0; link_id < ARRAY_SIZE(wdev->links); link_id++) {
+ struct cfg80211_internal_bss *curbss;
+
+ curbss = wdev->links[link_id].client.current_bss;
+
+ if (WARN_ON(curbss)) {
+ cfg80211_unhold_bss(curbss);
+ cfg80211_put_bss(wdev->wiphy, &curbss->pub);
+ wdev->links[link_id].client.current_bss = NULL;
+ }
+ }
}
+
+ wdev->connected = false;
}
void cfg80211_unregister_wdev(struct wireless_dev *wdev)
@@ -1242,7 +1267,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- __cfg80211_stop_ap(rdev, dev, true);
+ __cfg80211_stop_ap(rdev, dev, -1, true);
break;
case NL80211_IFTYPE_OCB:
__cfg80211_leave_ocb(rdev, dev);
@@ -1428,6 +1453,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
case NETDEV_GOING_DOWN:
wiphy_lock(&rdev->wiphy);
cfg80211_leave(rdev, wdev);
+ cfg80211_remove_links(wdev);
wiphy_unlock(&rdev->wiphy);
break;
case NETDEV_DOWN:
@@ -1472,9 +1498,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
memcpy(&setup, &default_mesh_setup,
sizeof(setup));
/* back compat only needed for mesh_id */
- setup.mesh_id = wdev->ssid;
- setup.mesh_id_len = wdev->mesh_id_up_len;
- if (wdev->mesh_id_up_len)
+ setup.mesh_id = wdev->u.mesh.id;
+ setup.mesh_id_len = wdev->u.mesh.id_up_len;
+ if (wdev->u.mesh.id_up_len)
__cfg80211_join_mesh(rdev, dev,
&setup,
&default_mesh_config);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3a7dbd63d8c6..775e16cb99ed 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -281,12 +281,6 @@ struct cfg80211_cached_keys {
int def;
};
-enum cfg80211_chan_mode {
- CHAN_MODE_UNDEFINED,
- CHAN_MODE_SHARED,
- CHAN_MODE_EXCLUSIVE,
-};
-
struct cfg80211_beacon_registration {
struct list_head list;
u32 nlportid;
@@ -313,6 +307,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev);
void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
unsigned long age_secs);
void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
+ unsigned int link,
struct ieee80211_channel *channel);
/* IBSS */
@@ -359,32 +354,25 @@ int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
/* AP */
int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool notify);
+ struct net_device *dev, int link,
+ bool notify);
int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev, bool notify);
+ struct net_device *dev, int link,
+ bool notify);
/* MLME */
int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct ieee80211_channel *chan,
- enum nl80211_auth_type auth_type,
- const u8 *bssid,
- const u8 *ssid, int ssid_len,
- const u8 *ie, int ie_len,
- const u8 *key, int key_len, int key_idx,
- const u8 *auth_data, int auth_data_len);
+ struct cfg80211_auth_request *req);
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct ieee80211_channel *chan,
- const u8 *bssid,
- const u8 *ssid, int ssid_len,
struct cfg80211_assoc_request *req);
int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
struct net_device *dev, const u8 *bssid,
const u8 *ie, int ie_len, u16 reason,
bool local_state_change);
int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *bssid,
+ struct net_device *dev, const u8 *ap_addr,
const u8 *ie, int ie_len, u16 reason,
bool local_state_change);
void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
@@ -513,7 +501,11 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy,
bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev);
bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
- struct ieee80211_channel *chan);
+ struct ieee80211_channel *chan,
+ bool primary_only);
+bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan,
+ bool primary_only);
static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
{
@@ -525,12 +517,6 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
return jiffies_to_msecs(end + (ULONG_MAX - start) + 1);
}
-void
-cfg80211_get_chan_state(struct wireless_dev *wdev,
- struct ieee80211_channel **chan,
- enum cfg80211_chan_mode *chanmode,
- u8 *radar_detect);
-
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef);
@@ -576,4 +562,9 @@ void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
void cfg80211_pmsr_free_wk(struct work_struct *work);
+void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id);
+void cfg80211_remove_links(struct wireless_dev *wdev);
+int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
+
#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index aab43469a2f0..0878b162890a 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -65,9 +65,10 @@ static ssize_t ht40allow_map_read(struct file *file,
{
struct wiphy *wiphy = file->private_data;
char *buf;
- unsigned int offset = 0, buf_size = PAGE_SIZE, i, r;
+ unsigned int offset = 0, buf_size = PAGE_SIZE, i;
enum nl80211_band band;
struct ieee80211_supported_band *sband;
+ ssize_t r;
buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf)
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index 24e18405cdb4..2613d6ac0fda 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -10,20 +10,20 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
struct device *pdev = wiphy_dev(wdev->wiphy);
if (pdev->driver)
- strlcpy(info->driver, pdev->driver->name,
+ strscpy(info->driver, pdev->driver->name,
sizeof(info->driver));
else
- strlcpy(info->driver, "N/A", sizeof(info->driver));
+ strscpy(info->driver, "N/A", sizeof(info->driver));
- strlcpy(info->version, init_utsname()->release, sizeof(info->version));
+ strscpy(info->version, init_utsname()->release, sizeof(info->version));
if (wdev->wiphy->fw_version[0])
- strlcpy(info->fw_version, wdev->wiphy->fw_version,
+ strscpy(info->fw_version, wdev->wiphy->fw_version,
sizeof(info->fw_version));
else
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strscpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)),
+ strscpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)),
sizeof(info->bus_info));
}
EXPORT_SYMBOL(cfg80211_get_drvinfo);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 8f98e546becf..edd062f104f4 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -3,7 +3,7 @@
* Some IBSS support code for cfg80211.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
*/
#include <linux/etherdevice.h>
@@ -28,7 +28,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
return;
- if (!wdev->ssid_len)
+ if (!wdev->u.ibss.ssid_len)
return;
bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0,
@@ -37,13 +37,13 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
if (WARN_ON(!bss))
return;
- if (wdev->current_bss) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
+ if (wdev->u.ibss.current_bss) {
+ cfg80211_unhold_bss(wdev->u.ibss.current_bss);
+ cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub);
}
cfg80211_hold_bss(bss_from_pub(bss));
- wdev->current_bss = bss_from_pub(bss);
+ wdev->u.ibss.current_bss = bss_from_pub(bss);
if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
cfg80211_upload_connect_keys(wdev);
@@ -96,7 +96,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
lockdep_assert_held(&rdev->wiphy.mtx);
ASSERT_WDEV_LOCK(wdev);
- if (wdev->ssid_len)
+ if (wdev->u.ibss.ssid_len)
return -EALREADY;
if (!params->basic_rates) {
@@ -131,9 +131,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
kfree_sensitive(wdev->connect_keys);
wdev->connect_keys = connkeys;
- wdev->ibss_fixed = params->channel_fixed;
- wdev->ibss_dfs_possible = params->userspace_handles_dfs;
- wdev->chandef = params->chandef;
+ wdev->u.ibss.chandef = params->chandef;
if (connkeys) {
params->wep_keys = connkeys->params;
params->wep_tx_key = connkeys->def;
@@ -148,8 +146,8 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
return err;
}
- memcpy(wdev->ssid, params->ssid, params->ssid_len);
- wdev->ssid_len = params->ssid_len;
+ memcpy(wdev->u.ibss.ssid, params->ssid, params->ssid_len);
+ wdev->u.ibss.ssid_len = params->ssid_len;
return 0;
}
@@ -173,16 +171,16 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
*/
if (rdev->ops->del_key)
for (i = 0; i < 6; i++)
- rdev_del_key(rdev, dev, i, false, NULL);
+ rdev_del_key(rdev, dev, -1, i, false, NULL);
- if (wdev->current_bss) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
+ if (wdev->u.ibss.current_bss) {
+ cfg80211_unhold_bss(wdev->u.ibss.current_bss);
+ cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub);
}
- wdev->current_bss = NULL;
- wdev->ssid_len = 0;
- memset(&wdev->chandef, 0, sizeof(wdev->chandef));
+ wdev->u.ibss.current_bss = NULL;
+ wdev->u.ibss.ssid_len = 0;
+ memset(&wdev->u.ibss.chandef, 0, sizeof(wdev->u.ibss.chandef));
#ifdef CONFIG_CFG80211_WEXT
if (!nowext)
wdev->wext.ibss.ssid_len = 0;
@@ -207,7 +205,7 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
- if (!wdev->ssid_len)
+ if (!wdev->u.ibss.ssid_len)
return -ENOLINK;
err = rdev_leave_ibss(rdev, dev);
@@ -341,7 +339,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
wdev_lock(wdev);
err = 0;
- if (wdev->ssid_len)
+ if (wdev->u.ibss.ssid_len)
err = __cfg80211_leave_ibss(rdev, dev, true);
wdev_unlock(wdev);
@@ -376,8 +374,8 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
return -EINVAL;
wdev_lock(wdev);
- if (wdev->current_bss)
- chan = wdev->current_bss->pub.channel;
+ if (wdev->u.ibss.current_bss)
+ chan = wdev->u.ibss.current_bss->pub.channel;
else if (wdev->wext.ibss.chandef.chan)
chan = wdev->wext.ibss.chandef.chan;
wdev_unlock(wdev);
@@ -410,7 +408,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
wdev_lock(wdev);
err = 0;
- if (wdev->ssid_len)
+ if (wdev->u.ibss.ssid_len)
err = __cfg80211_leave_ibss(rdev, dev, true);
wdev_unlock(wdev);
@@ -421,8 +419,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
if (len > 0 && ssid[len - 1] == '\0')
len--;
- memcpy(wdev->ssid, ssid, len);
- wdev->wext.ibss.ssid = wdev->ssid;
+ memcpy(wdev->u.ibss.ssid, ssid, len);
+ wdev->wext.ibss.ssid = wdev->u.ibss.ssid;
wdev->wext.ibss.ssid_len = len;
wdev_lock(wdev);
@@ -445,10 +443,10 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
data->flags = 0;
wdev_lock(wdev);
- if (wdev->ssid_len) {
+ if (wdev->u.ibss.ssid_len) {
data->flags = 1;
- data->length = wdev->ssid_len;
- memcpy(ssid, wdev->ssid, data->length);
+ data->length = wdev->u.ibss.ssid_len;
+ memcpy(ssid, wdev->u.ibss.ssid, data->length);
} else if (wdev->wext.ibss.ssid && wdev->wext.ibss.ssid_len) {
data->flags = 1;
data->length = wdev->wext.ibss.ssid_len;
@@ -496,7 +494,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
wdev_lock(wdev);
err = 0;
- if (wdev->ssid_len)
+ if (wdev->u.ibss.ssid_len)
err = __cfg80211_leave_ibss(rdev, dev, true);
wdev_unlock(wdev);
@@ -529,8 +527,9 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
ap_addr->sa_family = ARPHRD_ETHER;
wdev_lock(wdev);
- if (wdev->current_bss)
- memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
+ if (wdev->u.ibss.current_bss)
+ memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid,
+ ETH_ALEN);
else if (wdev->wext.ibss.bssid)
memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN);
else
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 6a5f08f7491e..cca5e1cf089e 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -136,7 +136,7 @@ static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr,
pos = (u8 *) hdr;
aad[0] = pos[0] & 0x8f;
aad[1] = pos[1] & 0xc7;
- memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN);
+ memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN);
pos = (u8 *) & hdr->seq_ctrl;
aad[20] = pos[0] & 0x0f;
aad[21] = 0; /* all bits masked */
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index e4e363138279..59a3c5c092b1 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,4 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+/*
+ * Portions
+ * Copyright (C) 2022 Intel Corporation
+ */
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/cfg80211.h>
@@ -114,7 +118,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
setup->is_secure)
return -EOPNOTSUPP;
- if (wdev->mesh_id_len)
+ if (wdev->u.mesh.id_len)
return -EALREADY;
if (!setup->mesh_id_len)
@@ -125,7 +129,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
if (!setup->chandef.chan) {
/* if no channel explicitly given, use preset channel */
- setup->chandef = wdev->preset_chandef;
+ setup->chandef = wdev->u.mesh.preset_chandef;
}
if (!setup->chandef.chan) {
@@ -209,10 +213,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
err = rdev_join_mesh(rdev, dev, conf, setup);
if (!err) {
- memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
- wdev->mesh_id_len = setup->mesh_id_len;
- wdev->chandef = setup->chandef;
- wdev->beacon_interval = setup->beacon_interval;
+ memcpy(wdev->u.mesh.id, setup->mesh_id, setup->mesh_id_len);
+ wdev->u.mesh.id_len = setup->mesh_id_len;
+ wdev->u.mesh.chandef = setup->chandef;
+ wdev->u.mesh.beacon_interval = setup->beacon_interval;
}
return err;
@@ -241,15 +245,15 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev,
chandef->chan);
if (!err)
- wdev->chandef = *chandef;
+ wdev->u.mesh.chandef = *chandef;
return err;
}
- if (wdev->mesh_id_len)
+ if (wdev->u.mesh.id_len)
return -EBUSY;
- wdev->preset_chandef = *chandef;
+ wdev->u.mesh.preset_chandef = *chandef;
return 0;
}
@@ -267,15 +271,16 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
if (!rdev->ops->leave_mesh)
return -EOPNOTSUPP;
- if (!wdev->mesh_id_len)
+ if (!wdev->u.mesh.id_len)
return -ENOTCONN;
err = rdev_leave_mesh(rdev, dev);
if (!err) {
wdev->conn_owner_nlportid = 0;
- wdev->mesh_id_len = 0;
- wdev->beacon_interval = 0;
- memset(&wdev->chandef, 0, sizeof(wdev->chandef));
+ wdev->u.mesh.id_len = 0;
+ wdev->u.mesh.beacon_interval = 0;
+ memset(&wdev->u.mesh.chandef, 0,
+ sizeof(wdev->u.mesh.chandef));
rdev_set_qos_map(rdev, dev, NULL);
cfg80211_sched_dfs_chan_update(rdev);
}
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index c8155a483ec2..581df7f4c524 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -4,7 +4,7 @@
*
* Copyright (c) 2009, Jouni Malinen <j@w1.fi>
* Copyright (c) 2015 Intel Deutschland GmbH
- * Copyright (C) 2019-2020 Intel Corporation
+ * Copyright (C) 2019-2020, 2022 Intel Corporation
*/
#include <linux/kernel.h>
@@ -21,36 +21,50 @@
#include "rdev-ops.h"
-void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
- const u8 *buf, size_t len, int uapsd_queues,
- const u8 *req_ies, size_t req_ies_len)
+void cfg80211_rx_assoc_resp(struct net_device *dev,
+ struct cfg80211_rx_assoc_resp *data)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
- struct cfg80211_connect_resp_params cr;
- const u8 *resp_ie = mgmt->u.assoc_resp.variable;
- size_t resp_ie_len = len - offsetof(struct ieee80211_mgmt,
- u.assoc_resp.variable);
-
- if (bss->channel->band == NL80211_BAND_S1GHZ) {
- resp_ie = (u8 *)&mgmt->u.s1g_assoc_resp.variable;
- resp_ie_len = len - offsetof(struct ieee80211_mgmt,
- u.s1g_assoc_resp.variable);
- }
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)data->buf;
+ struct cfg80211_connect_resp_params cr = {
+ .timeout_reason = NL80211_TIMEOUT_UNSPECIFIED,
+ .req_ie = data->req_ies,
+ .req_ie_len = data->req_ies_len,
+ .resp_ie = mgmt->u.assoc_resp.variable,
+ .resp_ie_len = data->len -
+ offsetof(struct ieee80211_mgmt,
+ u.assoc_resp.variable),
+ .status = le16_to_cpu(mgmt->u.assoc_resp.status_code),
+ .ap_mld_addr = data->ap_mld_addr,
+ };
+ unsigned int link_id;
- memset(&cr, 0, sizeof(cr));
- cr.status = (int)le16_to_cpu(mgmt->u.assoc_resp.status_code);
- cr.bssid = mgmt->bssid;
- cr.bss = bss;
- cr.req_ie = req_ies;
- cr.req_ie_len = req_ies_len;
- cr.resp_ie = resp_ie;
- cr.resp_ie_len = resp_ie_len;
- cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED;
+ for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) {
+ cr.links[link_id].bss = data->links[link_id].bss;
+ if (!cr.links[link_id].bss)
+ continue;
+ cr.links[link_id].bssid = data->links[link_id].bss->bssid;
+ cr.links[link_id].addr = data->links[link_id].addr;
+ /* need to have local link addresses for MLO connections */
+ WARN_ON(cr.ap_mld_addr && !cr.links[link_id].addr);
+
+ BUG_ON(!cr.links[link_id].bss->channel);
+
+ if (cr.links[link_id].bss->channel->band == NL80211_BAND_S1GHZ) {
+ WARN_ON(link_id);
+ cr.resp_ie = (u8 *)&mgmt->u.s1g_assoc_resp.variable;
+ cr.resp_ie_len = data->len -
+ offsetof(struct ieee80211_mgmt,
+ u.s1g_assoc_resp.variable);
+ }
- trace_cfg80211_send_rx_assoc(dev, bss);
+ if (cr.ap_mld_addr)
+ cr.valid_links |= BIT(link_id);
+ }
+
+ trace_cfg80211_send_rx_assoc(dev, data);
/*
* This is a bit of a hack, we don't notify userspace of
@@ -59,13 +73,19 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
* frame instead of reassoc.
*/
if (cfg80211_sme_rx_assoc_resp(wdev, cr.status)) {
- cfg80211_unhold_bss(bss_from_pub(bss));
- cfg80211_put_bss(wiphy, bss);
+ for (link_id = 0; link_id < ARRAY_SIZE(data->links); link_id++) {
+ struct cfg80211_bss *bss = data->links[link_id].bss;
+
+ if (!bss)
+ continue;
+
+ cfg80211_unhold_bss(bss_from_pub(bss));
+ cfg80211_put_bss(wiphy, bss);
+ }
return;
}
- nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues,
- req_ies, req_ies_len);
+ nl80211_send_rx_assoc(rdev, dev, data);
/* update current_bss etc., consumes the bss reference */
__cfg80211_connect_result(dev, &cr, cr.status == WLAN_STATUS_SUCCESS);
}
@@ -92,8 +112,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev,
nl80211_send_deauth(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL);
- if (!wdev->current_bss ||
- !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))
+ if (!wdev->connected || !ether_addr_equal(wdev->u.client.connected_addr, bssid))
return;
__cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap);
@@ -113,8 +132,8 @@ static void cfg80211_process_disassoc(struct wireless_dev *wdev,
nl80211_send_disassoc(rdev, wdev->netdev, buf, len, reconnect,
GFP_KERNEL);
- if (WARN_ON(!wdev->current_bss ||
- !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ if (WARN_ON(!wdev->connected ||
+ !ether_addr_equal(wdev->u.client.connected_addr, bssid)))
return;
__cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap);
@@ -155,33 +174,35 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr)
}
EXPORT_SYMBOL(cfg80211_auth_timeout);
-void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss)
+void cfg80211_assoc_failure(struct net_device *dev,
+ struct cfg80211_assoc_failure *data)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ const u8 *addr = data->ap_mld_addr ?: data->bss[0]->bssid;
+ int i;
- trace_cfg80211_send_assoc_timeout(dev, bss->bssid);
-
- nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL);
- cfg80211_sme_assoc_timeout(wdev);
+ trace_cfg80211_send_assoc_failure(dev, data);
- cfg80211_unhold_bss(bss_from_pub(bss));
- cfg80211_put_bss(wiphy, bss);
-}
-EXPORT_SYMBOL(cfg80211_assoc_timeout);
+ if (data->timeout) {
+ nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL);
+ cfg80211_sme_assoc_timeout(wdev);
+ } else {
+ cfg80211_sme_abandon_assoc(wdev);
+ }
-void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss)
-{
- struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct wiphy *wiphy = wdev->wiphy;
+ for (i = 0; i < ARRAY_SIZE(data->bss); i++) {
+ struct cfg80211_bss *bss = data->bss[i];
- cfg80211_sme_abandon_assoc(wdev);
+ if (!bss)
+ continue;
- cfg80211_unhold_bss(bss_from_pub(bss));
- cfg80211_put_bss(wiphy, bss);
+ cfg80211_unhold_bss(bss_from_pub(bss));
+ cfg80211_put_bss(wiphy, bss);
+ }
}
-EXPORT_SYMBOL(cfg80211_abandon_assoc);
+EXPORT_SYMBOL(cfg80211_assoc_failure);
void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len,
bool reconnect)
@@ -233,47 +254,30 @@ EXPORT_SYMBOL(cfg80211_michael_mic_failure);
/* some MLME handling for userspace SME */
int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct ieee80211_channel *chan,
- enum nl80211_auth_type auth_type,
- const u8 *bssid,
- const u8 *ssid, int ssid_len,
- const u8 *ie, int ie_len,
- const u8 *key, int key_len, int key_idx,
- const u8 *auth_data, int auth_data_len)
+ struct cfg80211_auth_request *req)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_auth_request req = {
- .ie = ie,
- .ie_len = ie_len,
- .auth_data = auth_data,
- .auth_data_len = auth_data_len,
- .auth_type = auth_type,
- .key = key,
- .key_len = key_len,
- .key_idx = key_idx,
- };
- int err;
ASSERT_WDEV_LOCK(wdev);
- if (auth_type == NL80211_AUTHTYPE_SHARED_KEY)
- if (!key || !key_len || key_idx < 0 || key_idx > 3)
- return -EINVAL;
+ if (!req->bss)
+ return -ENOENT;
- if (wdev->current_bss &&
- ether_addr_equal(bssid, wdev->current_bss->pub.bssid))
- return -EALREADY;
+ if (req->link_id >= 0 &&
+ !(wdev->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))
+ return -EINVAL;
- req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
- IEEE80211_BSS_TYPE_ESS,
- IEEE80211_PRIVACY_ANY);
- if (!req.bss)
- return -ENOENT;
+ if (req->auth_type == NL80211_AUTHTYPE_SHARED_KEY) {
+ if (!req->key || !req->key_len ||
+ req->key_idx < 0 || req->key_idx > 3)
+ return -EINVAL;
+ }
- err = rdev_auth(rdev, dev, &req);
+ if (wdev->connected &&
+ ether_addr_equal(req->bss->bssid, wdev->u.client.connected_addr))
+ return -EALREADY;
- cfg80211_put_bss(&rdev->wiphy, req.bss);
- return err;
+ return rdev_auth(rdev, dev, req);
}
/* Do a logical ht_capa &= ht_capa_mask. */
@@ -310,21 +314,28 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
p1[i] &= p2[i];
}
+/* Note: caller must cfg80211_put_bss() regardless of result */
int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct ieee80211_channel *chan,
- const u8 *bssid,
- const u8 *ssid, int ssid_len,
struct cfg80211_assoc_request *req)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
+ int err, i, j;
ASSERT_WDEV_LOCK(wdev);
- if (wdev->current_bss &&
- (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid,
- req->prev_bssid)))
+ for (i = 1; i < ARRAY_SIZE(req->links); i++) {
+ if (!req->links[i].bss)
+ continue;
+ for (j = 0; j < i; j++) {
+ if (req->links[i].bss == req->links[j].bss)
+ return -EINVAL;
+ }
+ }
+
+ if (wdev->connected &&
+ (!req->prev_bssid ||
+ !ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid)))
return -EALREADY;
cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
@@ -332,18 +343,22 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
rdev->wiphy.vht_capa_mod_mask);
- req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
- IEEE80211_BSS_TYPE_ESS,
- IEEE80211_PRIVACY_ANY);
- if (!req->bss)
- return -ENOENT;
-
err = rdev_assoc(rdev, dev, req);
- if (!err)
- cfg80211_hold_bss(bss_from_pub(req->bss));
- else
- cfg80211_put_bss(&rdev->wiphy, req->bss);
+ if (!err) {
+ int link_id;
+
+ if (req->bss) {
+ cfg80211_ref_bss(&rdev->wiphy, req->bss);
+ cfg80211_hold_bss(bss_from_pub(req->bss));
+ }
+ for (link_id = 0; link_id < ARRAY_SIZE(req->links); link_id++) {
+ if (!req->links[link_id].bss)
+ continue;
+ cfg80211_ref_bss(&rdev->wiphy, req->links[link_id].bss);
+ cfg80211_hold_bss(bss_from_pub(req->links[link_id].bss));
+ }
+ }
return err;
}
@@ -364,20 +379,20 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
if (local_state_change &&
- (!wdev->current_bss ||
- !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ (!wdev->connected ||
+ !ether_addr_equal(wdev->u.client.connected_addr, bssid)))
return 0;
if (ether_addr_equal(wdev->disconnect_bssid, bssid) ||
- (wdev->current_bss &&
- ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ (wdev->connected &&
+ ether_addr_equal(wdev->u.client.connected_addr, bssid)))
wdev->conn_owner_nlportid = 0;
return rdev_deauth(rdev, dev, &req);
}
int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *bssid,
+ struct net_device *dev, const u8 *ap_addr,
const u8 *ie, int ie_len, u16 reason,
bool local_state_change)
{
@@ -387,17 +402,16 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
.local_state_change = local_state_change,
.ie = ie,
.ie_len = ie_len,
+ .ap_addr = ap_addr,
};
int err;
ASSERT_WDEV_LOCK(wdev);
- if (!wdev->current_bss)
+ if (!wdev->connected)
return -ENOTCONN;
- if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid))
- req.bss = &wdev->current_bss->pub;
- else
+ if (memcmp(wdev->u.client.connected_addr, ap_addr, ETH_ALEN))
return -ENOTCONN;
err = rdev_disassoc(rdev, dev, &req);
@@ -405,7 +419,7 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
return err;
/* driver should have reported the disassoc */
- WARN_ON(wdev->current_bss);
+ WARN_ON(wdev->connected);
return 0;
}
@@ -420,10 +434,10 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
if (!rdev->ops->deauth)
return;
- if (!wdev->current_bss)
+ if (!wdev->connected)
return;
- memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN);
+ memcpy(bssid, wdev->u.client.connected_addr, ETH_ALEN);
cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
WLAN_REASON_DEAUTH_LEAVING, false);
}
@@ -643,6 +657,18 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
cfg80211_mgmt_registrations_update(wdev);
}
+static bool cfg80211_allowed_address(struct wireless_dev *wdev, const u8 *addr)
+{
+ int i;
+
+ for_each_valid_link(wdev, i) {
+ if (ether_addr_equal(addr, wdev->links[i].addr))
+ return true;
+ }
+
+ return ether_addr_equal(addr, wdev_address(wdev));
+}
+
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
struct cfg80211_mgmt_tx_params *params, u64 *cookie)
@@ -676,28 +702,34 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
+ /*
+ * check for IBSS DA must be done by driver as
+ * cfg80211 doesn't track the stations
+ */
+ if (!wdev->u.ibss.current_bss ||
+ !ether_addr_equal(wdev->u.ibss.current_bss->pub.bssid,
+ mgmt->bssid)) {
+ err = -ENOTCONN;
+ break;
+ }
+ break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- if (!wdev->current_bss) {
+ if (!wdev->connected) {
err = -ENOTCONN;
break;
}
- if (!ether_addr_equal(wdev->current_bss->pub.bssid,
+ /* FIXME: MLD may address this differently */
+
+ if (!ether_addr_equal(wdev->u.client.connected_addr,
mgmt->bssid)) {
err = -ENOTCONN;
break;
}
- /*
- * check for IBSS DA must be done by driver as
- * cfg80211 doesn't track the stations
- */
- if (wdev->iftype == NL80211_IFTYPE_ADHOC)
- break;
-
/* for station, check that DA is the AP */
- if (!ether_addr_equal(wdev->current_bss->pub.bssid,
+ if (!ether_addr_equal(wdev->u.client.connected_addr,
mgmt->da)) {
err = -ENOTCONN;
break;
@@ -735,7 +767,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
return err;
}
- if (!ether_addr_equal(mgmt->sa, wdev_address(wdev))) {
+ if (!cfg80211_allowed_address(wdev, mgmt->sa)) {
/* Allow random TA to be used with Public Action frames if the
* driver has indicated support for this. Otherwise, only allow
* the local address to be used.
@@ -743,31 +775,31 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
if (!ieee80211_is_action(mgmt->frame_control) ||
mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
return -EINVAL;
- if (!wdev->current_bss &&
+ if (!wdev->connected &&
!wiphy_ext_feature_isset(
&rdev->wiphy,
NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA))
return -EINVAL;
- if (wdev->current_bss &&
+ if (wdev->connected &&
!wiphy_ext_feature_isset(
&rdev->wiphy,
NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED))
return -EINVAL;
}
- /* Transmit the Action frame as requested by user space */
+ /* Transmit the management frame as requested by user space */
return rdev_mgmt_tx(rdev, wdev, params, cookie);
}
-bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
- const u8 *buf, size_t len, u32 flags)
+bool cfg80211_rx_mgmt_ext(struct wireless_dev *wdev,
+ struct cfg80211_rx_info *info)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg;
const struct ieee80211_txrx_stypes *stypes =
&wiphy->mgmt_stypes[wdev->iftype];
- struct ieee80211_mgmt *mgmt = (void *)buf;
+ struct ieee80211_mgmt *mgmt = (void *)info->buf;
const u8 *data;
int data_len;
bool result = false;
@@ -775,7 +807,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
u16 stype;
- trace_cfg80211_rx_mgmt(wdev, freq, sig_dbm);
+ trace_cfg80211_rx_mgmt(wdev, info);
stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
if (!(stypes->rx & BIT(stype))) {
@@ -783,8 +815,8 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
return false;
}
- data = buf + ieee80211_hdrlen(mgmt->frame_control);
- data_len = len - ieee80211_hdrlen(mgmt->frame_control);
+ data = info->buf + ieee80211_hdrlen(mgmt->frame_control);
+ data_len = info->len - ieee80211_hdrlen(mgmt->frame_control);
spin_lock_bh(&rdev->mgmt_registrations_lock);
@@ -801,9 +833,8 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
/* found match! */
/* Indicate the received Action frame to user space */
- if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
- freq, sig_dbm,
- buf, len, flags, GFP_ATOMIC))
+ if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, info,
+ GFP_ATOMIC))
continue;
result = true;
@@ -815,7 +846,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
trace_cfg80211_return_bool(result);
return result;
}
-EXPORT_SYMBOL(cfg80211_rx_mgmt_khz);
+EXPORT_SYMBOL(cfg80211_rx_mgmt_ext);
void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev)
{
@@ -940,14 +971,15 @@ void cfg80211_cac_event(struct net_device *netdev,
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long timeout;
+ /* not yet supported */
+ if (wdev->valid_links)
+ return;
+
trace_cfg80211_cac_event(netdev, event);
if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED))
return;
- if (WARN_ON(!wdev->chandef.chan))
- return;
-
switch (event) {
case NL80211_RADAR_CAC_FINISHED:
timeout = wdev->cac_start_time +
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 578bff9c378b..597c52236514 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/if.h>
@@ -285,6 +285,15 @@ static int validate_ie_attr(const struct nlattr *attr,
return -EINVAL;
}
+static int validate_he_capa(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ if (!ieee80211_he_capa_size_ok(nla_data(attr), nla_len(attr)))
+ return -EINVAL;
+
+ return 0;
+}
+
/* policy for the attributes */
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
@@ -447,6 +456,12 @@ nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = {
[NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG },
};
+static const struct nla_policy
+nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
+ [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 },
+ [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
+};
+
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -519,7 +534,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
.len = IEEE80211_MAX_MESH_ID_LEN },
[NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
- [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
+ /* allow 3 for NUL-termination, we used to declare this NLA_STRING */
+ [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3),
[NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
@@ -550,9 +566,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_USE_MFP] = NLA_POLICY_RANGE(NLA_U32,
NL80211_MFP_NO,
NL80211_MFP_OPTIONAL),
- [NL80211_ATTR_STA_FLAGS2] = {
- .len = sizeof(struct nl80211_sta_flag_update),
- },
+ [NL80211_ATTR_STA_FLAGS2] =
+ NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_sta_flag_update)),
[NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
[NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
@@ -605,6 +620,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
IEEE80211_MAX_DATA_LEN),
[NL80211_ATTR_ROAM_SUPPORT] = { .type = NLA_FLAG },
+ [NL80211_ATTR_STA_WME] = NLA_POLICY_NESTED(nl80211_sta_wme_policy),
[NL80211_ATTR_SCHED_SCAN_MATCH] = { .type = NLA_NESTED },
[NL80211_ATTR_TX_NO_CCK_RATE] = { .type = NLA_FLAG },
[NL80211_ATTR_TDLS_ACTION] = { .type = NLA_U8 },
@@ -730,9 +746,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
[NL80211_ATTR_HE_CAPABILITY] =
- NLA_POLICY_RANGE(NLA_BINARY,
- NL80211_HE_MIN_CAPABILITY_LEN,
- NL80211_HE_MAX_CAPABILITY_LEN),
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa,
+ NL80211_HE_MAX_CAPABILITY_LEN),
[NL80211_ATTR_FTM_RESPONDER] =
NLA_POLICY_NESTED(nl80211_ftm_responder_policy),
[NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1),
@@ -778,6 +793,18 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED },
[NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG },
[NL80211_ATTR_AP_SETTINGS_FLAGS] = { .type = NLA_U32 },
+ [NL80211_ATTR_EHT_CAPABILITY] =
+ NLA_POLICY_RANGE(NLA_BINARY,
+ NL80211_EHT_MIN_CAPABILITY_LEN,
+ NL80211_EHT_MAX_CAPABILITY_LEN),
+ [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG },
+ [NL80211_ATTR_MLO_LINKS] =
+ NLA_POLICY_NESTED_ARRAY(nl80211_policy),
+ [NL80211_ATTR_MLO_LINK_ID] =
+ NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS),
+ [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN),
+ [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG },
+ [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT },
};
/* policy for the key attributes */
@@ -1148,6 +1175,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_16MHZ) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_16MHZ))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_320MHZ) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_320MHZ))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_EHT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_EHT))
+ goto nla_put_failure;
}
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -1205,6 +1238,37 @@ static bool nl80211_put_txq_stats(struct sk_buff *msg,
/* netlink command implementations */
+/**
+ * nl80211_link_id - return link ID
+ * @attrs: attributes to look at
+ *
+ * Returns: the link ID or 0 if not given
+ *
+ * Note this function doesn't do any validation of the link
+ * ID validity wrt. links that were actually added, so it must
+ * be called only from ops with %NL80211_FLAG_MLO_VALID_LINK_ID
+ * or if additional validation is done.
+ */
+static unsigned int nl80211_link_id(struct nlattr **attrs)
+{
+ struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID];
+
+ if (!linkid)
+ return 0;
+
+ return nla_get_u8(linkid);
+}
+
+static int nl80211_link_id_or_invalid(struct nlattr **attrs)
+{
+ struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID];
+
+ if (!linkid)
+ return -1;
+
+ return nla_get_u8(linkid);
+}
+
struct key_parse {
struct key_params p;
int idx;
@@ -1476,11 +1540,14 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
case NL80211_IFTYPE_MESH_POINT:
break;
case NL80211_IFTYPE_ADHOC:
+ if (wdev->u.ibss.current_bss)
+ return 0;
+ return -ENOLINK;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- if (!wdev->current_bss)
- return -ENOLINK;
- break;
+ if (wdev->connected)
+ return 0;
+ return -ENOLINK;
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_OCB:
case NL80211_IFTYPE_MONITOR:
@@ -1729,6 +1796,7 @@ nl80211_send_iftype_data(struct sk_buff *msg,
const struct ieee80211_sband_iftype_data *iftdata)
{
const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
+ const struct ieee80211_sta_eht_cap *eht_cap = &iftdata->eht_cap;
if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES,
iftdata->types_mask))
@@ -1749,6 +1817,37 @@ nl80211_send_iftype_data(struct sk_buff *msg,
return -ENOBUFS;
}
+ if (eht_cap->has_eht && he_cap->has_he) {
+ u8 mcs_nss_size, ppe_thresh_size;
+ u16 ppe_thres_hdr;
+ bool is_ap;
+
+ is_ap = iftdata->types_mask & BIT(NL80211_IFTYPE_AP) ||
+ iftdata->types_mask & BIT(NL80211_IFTYPE_P2P_GO);
+
+ mcs_nss_size =
+ ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem,
+ &eht_cap->eht_cap_elem,
+ is_ap);
+
+ ppe_thres_hdr = get_unaligned_le16(&eht_cap->eht_ppe_thres[0]);
+ ppe_thresh_size =
+ ieee80211_eht_ppe_size(ppe_thres_hdr,
+ eht_cap->eht_cap_elem.phy_cap_info);
+
+ if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC,
+ sizeof(eht_cap->eht_cap_elem.mac_cap_info),
+ eht_cap->eht_cap_elem.mac_cap_info) ||
+ nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY,
+ sizeof(eht_cap->eht_cap_elem.phy_cap_info),
+ eht_cap->eht_cap_elem.phy_cap_info) ||
+ nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET,
+ mcs_nss_size, &eht_cap->eht_mcs_nss_supp) ||
+ nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE,
+ ppe_thresh_size, eht_cap->eht_ppe_thres))
+ return -ENOBUFS;
+ }
+
if (sband->band == NL80211_BAND_6GHZ &&
nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA,
sizeof(iftdata->he_6ghz_capa),
@@ -2778,6 +2877,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
capab->extended_capabilities_mask))
goto nla_put_failure;
+ if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO &&
+ (nla_put_u16(msg,
+ NL80211_ATTR_EML_CAPABILITY,
+ capab->eml_capabilities) ||
+ nla_put_u16(msg,
+ NL80211_ATTR_MLD_CAPA_AND_OPS,
+ capab->mld_capa_and_ops)))
+ goto nla_put_failure;
+
nla_nest_end(msg, nested_ext_capab);
if (state->split)
break;
@@ -2844,6 +2952,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
if (nl80211_put_mbssid_support(&rdev->wiphy, msg))
goto nla_put_failure;
+ if (nla_put_u16(msg, NL80211_ATTR_MAX_NUM_AKM_SUITES,
+ rdev->wiphy.max_num_akm_suites))
+ goto nla_put_failure;
+
+ if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO)
+ nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT);
+
/* done */
state->split_start = 0;
break;
@@ -3127,6 +3242,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
chandef->width =
nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+ if (chandef->chan->band == NL80211_BAND_S1GHZ) {
+ /* User input error for channel width doesn't match channel */
+ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_CHANNEL_WIDTH],
+ "bad channel width");
+ return -EINVAL;
+ }
+ }
if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
@@ -3176,12 +3300,14 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
struct net_device *dev,
- struct genl_info *info)
+ struct genl_info *info,
+ int _link_id)
{
struct cfg80211_chan_def chandef;
int result;
enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR;
struct wireless_dev *wdev = NULL;
+ int link_id = _link_id;
if (dev)
wdev = dev->ieee80211_ptr;
@@ -3190,6 +3316,12 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
if (wdev)
iftype = wdev->iftype;
+ if (link_id < 0) {
+ if (wdev && wdev->valid_links)
+ return -EINVAL;
+ link_id = 0;
+ }
+
result = nl80211_parse_chandef(rdev, info, &chandef);
if (result)
return result;
@@ -3198,49 +3330,53 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef,
- iftype)) {
- result = -EINVAL;
- break;
- }
- if (wdev->beacon_interval) {
+ iftype))
+ return -EINVAL;
+ if (wdev->links[link_id].ap.beacon_interval) {
+ struct ieee80211_channel *cur_chan;
+
if (!dev || !rdev->ops->set_ap_chanwidth ||
!(rdev->wiphy.features &
- NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) {
- result = -EBUSY;
- break;
- }
+ NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE))
+ return -EBUSY;
/* Only allow dynamic channel width changes */
- if (chandef.chan != wdev->preset_chandef.chan) {
- result = -EBUSY;
- break;
- }
- result = rdev_set_ap_chanwidth(rdev, dev, &chandef);
+ cur_chan = wdev->links[link_id].ap.chandef.chan;
+ if (chandef.chan != cur_chan)
+ return -EBUSY;
+
+ result = rdev_set_ap_chanwidth(rdev, dev, link_id,
+ &chandef);
if (result)
- break;
+ return result;
+ wdev->links[link_id].ap.chandef = chandef;
+ } else {
+ wdev->u.ap.preset_chandef = chandef;
}
- wdev->preset_chandef = chandef;
- result = 0;
- break;
+ return 0;
case NL80211_IFTYPE_MESH_POINT:
- result = cfg80211_set_mesh_channel(rdev, wdev, &chandef);
- break;
+ return cfg80211_set_mesh_channel(rdev, wdev, &chandef);
case NL80211_IFTYPE_MONITOR:
- result = cfg80211_set_monitor_channel(rdev, &chandef);
- break;
+ return cfg80211_set_monitor_channel(rdev, &chandef);
default:
- result = -EINVAL;
+ break;
}
- return result;
+ return -EINVAL;
}
static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
struct net_device *netdev = info->user_ptr[1];
+ int ret;
- return __nl80211_set_channel(rdev, netdev, info);
+ wdev_lock(netdev->ieee80211_ptr);
+ ret = __nl80211_set_channel(rdev, netdev, info, link_id);
+ wdev_unlock(netdev->ieee80211_ptr);
+
+ return ret;
}
static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
@@ -3344,18 +3480,40 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (result)
goto out;
- result = rdev_set_txq_params(rdev, netdev,
- &txq_params);
+ txq_params.link_id =
+ nl80211_link_id_or_invalid(info->attrs);
+
+ wdev_lock(netdev->ieee80211_ptr);
+ if (txq_params.link_id >= 0 &&
+ !(netdev->ieee80211_ptr->valid_links &
+ BIT(txq_params.link_id)))
+ result = -ENOLINK;
+ else if (txq_params.link_id >= 0 &&
+ !netdev->ieee80211_ptr->valid_links)
+ result = -EINVAL;
+ else
+ result = rdev_set_txq_params(rdev, netdev,
+ &txq_params);
+ wdev_unlock(netdev->ieee80211_ptr);
if (result)
goto out;
}
}
if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
- result = __nl80211_set_channel(
- rdev,
- nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
- info);
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
+
+ if (wdev) {
+ wdev_lock(wdev);
+ result = __nl80211_set_channel(
+ rdev,
+ nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
+ info, link_id);
+ wdev_unlock(wdev);
+ } else {
+ result = __nl80211_set_channel(rdev, netdev, info, link_id);
+ }
+
if (result)
goto out;
}
@@ -3640,15 +3798,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr))
goto nla_put_failure;
- if (rdev->ops->get_channel) {
- int ret;
+ if (rdev->ops->get_channel && !wdev->valid_links) {
struct cfg80211_chan_def chandef = {};
+ int ret;
- ret = rdev_get_channel(rdev, wdev, &chandef);
- if (ret == 0) {
- if (nl80211_send_chandef(msg, &chandef))
- goto nla_put_failure;
- }
+ ret = rdev_get_channel(rdev, wdev, 0, &chandef);
+ if (ret == 0 && nl80211_send_chandef(msg, &chandef))
+ goto nla_put_failure;
}
if (rdev->ops->get_tx_power) {
@@ -3664,27 +3820,25 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
wdev_lock(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
- if (wdev->ssid_len &&
- nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
+ case NL80211_IFTYPE_P2P_GO:
+ if (wdev->u.ap.ssid_len &&
+ nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len,
+ wdev->u.ap.ssid))
goto nla_put_failure_locked;
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- case NL80211_IFTYPE_ADHOC: {
- const struct element *ssid_elem;
-
- if (!wdev->current_bss)
- break;
- rcu_read_lock();
- ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub,
- WLAN_EID_SSID);
- if (ssid_elem &&
- nla_put(msg, NL80211_ATTR_SSID, ssid_elem->datalen,
- ssid_elem->data))
- goto nla_put_failure_rcu_locked;
- rcu_read_unlock();
+ if (wdev->u.client.ssid_len &&
+ nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len,
+ wdev->u.client.ssid))
+ goto nla_put_failure_locked;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ if (wdev->u.ibss.ssid_len &&
+ nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len,
+ wdev->u.ibss.ssid))
+ goto nla_put_failure_locked;
break;
- }
default:
/* nothing */
break;
@@ -3701,11 +3855,38 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
goto nla_put_failure;
}
+ if (wdev->valid_links) {
+ unsigned int link_id;
+ struct nlattr *links = nla_nest_start(msg,
+ NL80211_ATTR_MLO_LINKS);
+
+ if (!links)
+ goto nla_put_failure;
+
+ for_each_valid_link(wdev, link_id) {
+ struct nlattr *link = nla_nest_start(msg, link_id + 1);
+ struct cfg80211_chan_def chandef = {};
+ int ret;
+
+ if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))
+ goto nla_put_failure;
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
+ wdev->links[link_id].addr))
+ goto nla_put_failure;
+
+ ret = rdev_get_channel(rdev, wdev, link_id, &chandef);
+ if (ret == 0 && nl80211_send_chandef(msg, &chandef))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, link);
+ }
+
+ nla_nest_end(msg, links);
+ }
+
genlmsg_end(msg, hdr);
return 0;
- nla_put_failure_rcu_locked:
- rcu_read_unlock();
nla_put_failure_locked:
wdev_unlock(wdev);
nla_put_failure:
@@ -3957,10 +4138,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
wdev_lock(wdev);
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
IEEE80211_MAX_MESH_ID_LEN);
- wdev->mesh_id_up_len =
+ wdev->u.mesh.id_up_len =
nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
- memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
- wdev->mesh_id_up_len);
+ memcpy(wdev->u.mesh.id,
+ nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+ wdev->u.mesh.id_up_len);
wdev_unlock(wdev);
}
@@ -4065,10 +4247,11 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
wdev_lock(wdev);
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
IEEE80211_MAX_MESH_ID_LEN);
- wdev->mesh_id_up_len =
+ wdev->u.mesh.id_up_len =
nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
- memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
- wdev->mesh_id_up_len);
+ memcpy(wdev->u.mesh.id,
+ nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+ wdev->u.mesh.id_up_len);
wdev_unlock(wdev);
break;
case NL80211_IFTYPE_NAN:
@@ -4141,7 +4324,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&rdev->wiphy.mtx);
- return rdev_del_virtual_intf(rdev, wdev);
+ return cfg80211_remove_virtual_intf(rdev, wdev);
}
static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@ -4161,6 +4344,38 @@ static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
return rdev_set_noack_map(rdev, dev, noack_map);
}
+static int nl80211_validate_key_link_id(struct genl_info *info,
+ struct wireless_dev *wdev,
+ int link_id, bool pairwise)
+{
+ if (pairwise) {
+ if (link_id != -1) {
+ GENL_SET_ERR_MSG(info,
+ "link ID not allowed for pairwise key");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ if (wdev->valid_links) {
+ if (link_id == -1) {
+ GENL_SET_ERR_MSG(info,
+ "link ID must for MLO group key");
+ return -EINVAL;
+ }
+ if (!(wdev->valid_links & BIT(link_id))) {
+ GENL_SET_ERR_MSG(info, "invalid link ID for MLO group key");
+ return -EINVAL;
+ }
+ } else if (link_id != -1) {
+ GENL_SET_ERR_MSG(info, "link ID not allowed for non-MLO group key");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
struct get_key_cookie {
struct sk_buff *msg;
int error;
@@ -4222,13 +4437,15 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
void *hdr;
struct sk_buff *msg;
bool bigtk_support = false;
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
if (wiphy_ext_feature_isset(&rdev->wiphy,
NL80211_EXT_FEATURE_BEACON_PROTECTION))
bigtk_support = true;
- if ((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION ||
- dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
+ if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
wiphy_ext_feature_isset(&rdev->wiphy,
NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
bigtk_support = true;
@@ -4280,8 +4497,12 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr))
goto nla_put_failure;
- err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie,
- get_key_callback);
+ err = nl80211_validate_key_link_id(info, wdev, link_id, pairwise);
+ if (err)
+ goto free_msg;
+
+ err = rdev_get_key(rdev, dev, link_id, key_idx, pairwise, mac_addr,
+ &cookie, get_key_callback);
if (err)
goto free_msg;
@@ -4305,6 +4526,8 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
struct key_parse key;
int err;
struct net_device *dev = info->user_ptr[1];
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
err = nl80211_parse_key(info, &key);
if (err)
@@ -4320,7 +4543,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
!(key.p.mode == NL80211_KEY_SET_TX))
return -EINVAL;
- wdev_lock(dev->ieee80211_ptr);
+ wdev_lock(wdev);
if (key.def) {
if (!rdev->ops->set_default_key) {
@@ -4328,18 +4551,22 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- err = nl80211_key_allowed(dev->ieee80211_ptr);
+ err = nl80211_key_allowed(wdev);
+ if (err)
+ goto out;
+
+ err = nl80211_validate_key_link_id(info, wdev, link_id, false);
if (err)
goto out;
- err = rdev_set_default_key(rdev, dev, key.idx,
- key.def_uni, key.def_multi);
+ err = rdev_set_default_key(rdev, dev, link_id, key.idx,
+ key.def_uni, key.def_multi);
if (err)
goto out;
#ifdef CONFIG_CFG80211_WEXT
- dev->ieee80211_ptr->wext.default_key = key.idx;
+ wdev->wext.default_key = key.idx;
#endif
} else if (key.defmgmt) {
if (key.def_uni || !key.def_multi) {
@@ -4352,16 +4579,20 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- err = nl80211_key_allowed(dev->ieee80211_ptr);
+ err = nl80211_key_allowed(wdev);
if (err)
goto out;
- err = rdev_set_default_mgmt_key(rdev, dev, key.idx);
+ err = nl80211_validate_key_link_id(info, wdev, link_id, false);
+ if (err)
+ goto out;
+
+ err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx);
if (err)
goto out;
#ifdef CONFIG_CFG80211_WEXT
- dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
+ wdev->wext.default_mgmt_key = key.idx;
#endif
} else if (key.defbeacon) {
if (key.def_uni || !key.def_multi) {
@@ -4374,11 +4605,15 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- err = nl80211_key_allowed(dev->ieee80211_ptr);
+ err = nl80211_key_allowed(wdev);
+ if (err)
+ goto out;
+
+ err = nl80211_validate_key_link_id(info, wdev, link_id, false);
if (err)
goto out;
- err = rdev_set_default_beacon_key(rdev, dev, key.idx);
+ err = rdev_set_default_beacon_key(rdev, dev, link_id, key.idx);
if (err)
goto out;
} else if (key.p.mode == NL80211_KEY_SET_TX &&
@@ -4394,14 +4629,18 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- err = rdev_add_key(rdev, dev, key.idx,
+ err = nl80211_validate_key_link_id(info, wdev, link_id, true);
+ if (err)
+ goto out;
+
+ err = rdev_add_key(rdev, dev, link_id, key.idx,
NL80211_KEYTYPE_PAIRWISE,
mac_addr, &key.p);
} else {
err = -EINVAL;
}
out:
- wdev_unlock(dev->ieee80211_ptr);
+ wdev_unlock(wdev);
return err;
}
@@ -4413,6 +4652,8 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct key_parse key;
const u8 *mac_addr = NULL;
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
err = nl80211_parse_key(info, &key);
if (err)
@@ -4454,18 +4695,23 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- wdev_lock(dev->ieee80211_ptr);
- err = nl80211_key_allowed(dev->ieee80211_ptr);
+ wdev_lock(wdev);
+ err = nl80211_key_allowed(wdev);
if (err)
GENL_SET_ERR_MSG(info, "key not allowed");
+
+ if (!err)
+ err = nl80211_validate_key_link_id(info, wdev, link_id,
+ key.type == NL80211_KEYTYPE_PAIRWISE);
+
if (!err) {
- err = rdev_add_key(rdev, dev, key.idx,
+ err = rdev_add_key(rdev, dev, link_id, key.idx,
key.type == NL80211_KEYTYPE_PAIRWISE,
mac_addr, &key.p);
if (err)
GENL_SET_ERR_MSG(info, "key addition failed");
}
- wdev_unlock(dev->ieee80211_ptr);
+ wdev_unlock(wdev);
return err;
}
@@ -4477,6 +4723,8 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
u8 *mac_addr = NULL;
struct key_parse key;
+ int link_id = nl80211_link_id_or_invalid(info->attrs);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
err = nl80211_parse_key(info, &key);
if (err)
@@ -4504,27 +4752,31 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->del_key)
return -EOPNOTSUPP;
- wdev_lock(dev->ieee80211_ptr);
- err = nl80211_key_allowed(dev->ieee80211_ptr);
+ wdev_lock(wdev);
+ err = nl80211_key_allowed(wdev);
if (key.type == NL80211_KEYTYPE_GROUP && mac_addr &&
!(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
err = -ENOENT;
if (!err)
- err = rdev_del_key(rdev, dev, key.idx,
+ err = nl80211_validate_key_link_id(info, wdev, link_id,
+ key.type == NL80211_KEYTYPE_PAIRWISE);
+
+ if (!err)
+ err = rdev_del_key(rdev, dev, link_id, key.idx,
key.type == NL80211_KEYTYPE_PAIRWISE,
mac_addr);
#ifdef CONFIG_CFG80211_WEXT
if (!err) {
- if (key.idx == dev->ieee80211_ptr->wext.default_key)
- dev->ieee80211_ptr->wext.default_key = -1;
- else if (key.idx == dev->ieee80211_ptr->wext.default_mgmt_key)
- dev->ieee80211_ptr->wext.default_mgmt_key = -1;
+ if (key.idx == wdev->wext.default_key)
+ wdev->wext.default_key = -1;
+ else if (key.idx == wdev->wext.default_mgmt_key)
+ wdev->wext.default_mgmt_key = -1;
}
#endif
- wdev_unlock(dev->ieee80211_ptr);
+ wdev_unlock(wdev);
return err;
}
@@ -4605,7 +4857,7 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
return -EOPNOTSUPP;
- if (!dev->ieee80211_ptr->beacon_interval)
+ if (!dev->ieee80211_ptr->links[0].ap.beacon_interval)
return -EINVAL;
acl = parse_acl_data(&rdev->wiphy, info);
@@ -4761,14 +5013,24 @@ static void he_build_mcs_mask(u16 he_mcs_map,
}
}
-static u16 he_get_txmcsmap(struct genl_info *info,
+static u16 he_get_txmcsmap(struct genl_info *info, unsigned int link_id,
const struct ieee80211_sta_he_cap *he_cap)
{
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- __le16 tx_mcs;
+ struct cfg80211_chan_def *chandef;
+ __le16 tx_mcs;
+
+ chandef = wdev_chandef(wdev, link_id);
+ if (!chandef) {
+ /*
+ * This is probably broken, but we never maintained
+ * a chandef in these cases, so it always was.
+ */
+ return le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80);
+ }
- switch (wdev->chandef.width) {
+ switch (chandef->width) {
case NL80211_CHAN_WIDTH_80P80:
tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80;
break;
@@ -4779,6 +5041,7 @@ static u16 he_get_txmcsmap(struct genl_info *info,
tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80;
break;
}
+
return le16_to_cpu(tx_mcs);
}
@@ -4786,7 +5049,8 @@ static bool he_set_mcs_mask(struct genl_info *info,
struct wireless_dev *wdev,
struct ieee80211_supported_band *sband,
struct nl80211_txrate_he *txrate,
- u16 mcs[NL80211_HE_NSS_MAX])
+ u16 mcs[NL80211_HE_NSS_MAX],
+ unsigned int link_id)
{
const struct ieee80211_sta_he_cap *he_cap;
u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {};
@@ -4799,7 +5063,7 @@ static bool he_set_mcs_mask(struct genl_info *info,
memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX);
- tx_mcs_map = he_get_txmcsmap(info, he_cap);
+ tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap);
/* Build he_mcs_mask from HE capabilities */
he_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
@@ -4819,7 +5083,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
enum nl80211_attrs attr,
struct cfg80211_bitrate_mask *mask,
struct net_device *dev,
- bool default_all_enabled)
+ bool default_all_enabled,
+ unsigned int link_id)
{
struct nlattr *tb[NL80211_TXRATE_MAX + 1];
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -4856,7 +5121,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
if (!he_cap)
continue;
- he_tx_mcs_map = he_get_txmcsmap(info, he_cap);
+ he_tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap);
he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs);
mask->control[i].he_gi = 0xFF;
@@ -4921,7 +5186,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
if (tb[NL80211_TXRATE_HE] &&
!he_set_mcs_mask(info, wdev, sband,
nla_data(tb[NL80211_TXRATE_HE]),
- mask->control[band].he_mcs))
+ mask->control[band].he_mcs,
+ link_id))
return -EINVAL;
if (tb[NL80211_TXRATE_HE_GI])
@@ -5125,6 +5391,30 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
return elems;
}
+static int nl80211_parse_he_bss_color(struct nlattr *attrs,
+ struct cfg80211_he_bss_color *he_bss_color)
+{
+ struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs,
+ he_bss_color_policy, NULL);
+ if (err)
+ return err;
+
+ if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR])
+ return -EINVAL;
+
+ he_bss_color->color =
+ nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]);
+ he_bss_color->enabled =
+ !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]);
+ he_bss_color->partial =
+ nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]);
+
+ return 0;
+}
+
static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
struct nlattr *attrs[],
struct cfg80211_beacon_data *bcn)
@@ -5134,6 +5424,8 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
memset(bcn, 0, sizeof(*bcn));
+ bcn->link_id = nl80211_link_id(attrs);
+
if (attrs[NL80211_ATTR_BEACON_HEAD]) {
bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]);
bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]);
@@ -5205,6 +5497,14 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
bcn->ftm_responder = -1;
}
+ if (attrs[NL80211_ATTR_HE_BSS_COLOR]) {
+ err = nl80211_parse_he_bss_color(attrs[NL80211_ATTR_HE_BSS_COLOR],
+ &bcn->he_bss_color);
+ if (err)
+ return err;
+ bcn->he_bss_color_valid = true;
+ }
+
if (attrs[NL80211_ATTR_MBSSID_ELEMS]) {
struct cfg80211_mbssid_elems *mbssid =
nl80211_parse_mbssid_elems(&rdev->wiphy,
@@ -5263,30 +5563,6 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
return 0;
}
-static int nl80211_parse_he_bss_color(struct nlattr *attrs,
- struct cfg80211_he_bss_color *he_bss_color)
-{
- struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1];
- int err;
-
- err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs,
- he_bss_color_policy, NULL);
- if (err)
- return err;
-
- if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR])
- return -EINVAL;
-
- he_bss_color->color =
- nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]);
- he_bss_color->enabled =
- !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]);
- he_bss_color->partial =
- nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]);
-
- return 0;
-}
-
static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
struct nlattr *attrs,
struct cfg80211_ap_settings *params)
@@ -5371,7 +5647,7 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
* HT/VHT requirements/capabilities, we parse them out of the IEs for the
* benefit of drivers that rebuild IEs in the firmware.
*/
-static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
+static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
{
const struct cfg80211_beacon_data *bcn = &params->beacon;
size_t ies_len = bcn->tail_len;
@@ -5397,28 +5673,46 @@ static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ies, ies_len);
if (cap && cap->datalen >= sizeof(*params->he_oper) + 1)
params->he_oper = (void *)(cap->data + 1);
+ cap = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_CAPABILITY, ies, ies_len);
+ if (cap) {
+ if (!cap->datalen)
+ return -EINVAL;
+ params->eht_cap = (void *)(cap->data + 1);
+ if (!ieee80211_eht_capa_size_ok((const u8 *)params->he_cap,
+ (const u8 *)params->eht_cap,
+ cap->datalen - 1, true))
+ return -EINVAL;
+ }
+ cap = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION, ies, ies_len);
+ if (cap) {
+ if (!cap->datalen)
+ return -EINVAL;
+ params->eht_oper = (void *)(cap->data + 1);
+ if (!ieee80211_eht_oper_size_ok((const u8 *)params->eht_oper,
+ cap->datalen - 1))
+ return -EINVAL;
+ }
+ return 0;
}
static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_ap_settings *params)
{
struct wireless_dev *wdev;
- bool ret = false;
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (wdev->iftype != NL80211_IFTYPE_AP &&
wdev->iftype != NL80211_IFTYPE_P2P_GO)
continue;
- if (!wdev->preset_chandef.chan)
+ if (!wdev->u.ap.preset_chandef.chan)
continue;
- params->chandef = wdev->preset_chandef;
- ret = true;
- break;
+ params->chandef = wdev->u.ap.preset_chandef;
+ return true;
}
- return ret;
+ return false;
}
static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
@@ -5476,6 +5770,7 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_ap_settings *params;
@@ -5488,7 +5783,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->start_ap)
return -EOPNOTSUPP;
- if (wdev->beacon_interval)
+ if (wdev->links[link_id].ap.beacon_interval)
return -EALREADY;
/* these are required for START_AP */
@@ -5530,6 +5825,18 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = -EINVAL;
goto out;
}
+
+ if (wdev->u.ap.ssid_len &&
+ (wdev->u.ap.ssid_len != params->ssid_len ||
+ memcmp(wdev->u.ap.ssid, params->ssid, params->ssid_len))) {
+ /* require identical SSID for MLO */
+ err = -EINVAL;
+ goto out;
+ }
+ } else if (wdev->valid_links) {
+ /* require SSID for MLO */
+ err = -EINVAL;
+ goto out;
}
if (info->attrs[NL80211_ATTR_HIDDEN_SSID])
@@ -5597,8 +5904,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_chandef(rdev, info, &params->chandef);
if (err)
goto out;
- } else if (wdev->preset_chandef.chan) {
- params->chandef = wdev->preset_chandef;
+ } else if (wdev->valid_links) {
+ /* with MLD need to specify the channel configuration */
+ err = -EINVAL;
+ goto out;
+ } else if (wdev->u.ap.preset_chandef.chan) {
+ params->chandef = wdev->u.ap.preset_chandef;
} else if (!nl80211_get_ap_channel(rdev, params)) {
err = -EINVAL;
goto out;
@@ -5610,18 +5921,20 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
+ wdev_lock(wdev);
+
if (info->attrs[NL80211_ATTR_TX_RATES]) {
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES,
&params->beacon_rate,
- dev, false);
+ dev, false, link_id);
if (err)
- goto out;
+ goto out_unlock;
err = validate_beacon_tx_rate(rdev, params->chandef.chan->band,
&params->beacon_rate);
if (err)
- goto out;
+ goto out_unlock;
}
if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
@@ -5634,19 +5947,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!(rdev->wiphy.features &
NL80211_FEATURE_STATIC_SMPS)) {
err = -EINVAL;
- goto out;
+ goto out_unlock;
}
break;
case NL80211_SMPS_DYNAMIC:
if (!(rdev->wiphy.features &
NL80211_FEATURE_DYNAMIC_SMPS)) {
err = -EINVAL;
- goto out;
+ goto out_unlock;
}
break;
default:
err = -EINVAL;
- goto out;
+ goto out_unlock;
}
} else {
params->smps_mode = NL80211_SMPS_OFF;
@@ -5655,7 +5968,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) {
err = -EOPNOTSUPP;
- goto out;
+ goto out_unlock;
}
if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
@@ -5663,7 +5976,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(params->acl)) {
err = PTR_ERR(params->acl);
params->acl = NULL;
- goto out;
+ goto out_unlock;
}
}
@@ -5675,15 +5988,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_HE_OBSS_PD],
&params->he_obss_pd);
if (err)
- goto out;
- }
-
- if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) {
- err = nl80211_parse_he_bss_color(
- info->attrs[NL80211_ATTR_HE_BSS_COLOR],
- &params->he_bss_color);
- if (err)
- goto out;
+ goto out_unlock;
}
if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) {
@@ -5691,7 +5996,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_FILS_DISCOVERY],
params);
if (err)
- goto out;
+ goto out_unlock;
}
if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) {
@@ -5699,7 +6004,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP],
params);
if (err)
- goto out;
+ goto out_unlock;
}
if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) {
@@ -5710,10 +6015,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
params->beacon.mbssid_ies->cnt :
0);
if (err)
- goto out;
+ goto out_unlock;
}
- nl80211_calculate_ap_params(params);
+ err = nl80211_calculate_ap_params(params);
+ if (err)
+ goto out_unlock;
if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS])
params->flags = nla_get_u32(
@@ -5721,20 +6028,28 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT;
- wdev_lock(wdev);
+ if (wdev->conn_owner_nlportid &&
+ info->attrs[NL80211_ATTR_SOCKET_OWNER] &&
+ wdev->conn_owner_nlportid != info->snd_portid) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* FIXME: validate MLO/link-id against driver capabilities */
+
err = rdev_start_ap(rdev, dev, params);
if (!err) {
- wdev->preset_chandef = params->chandef;
- wdev->beacon_interval = params->beacon_interval;
- wdev->chandef = params->chandef;
- wdev->ssid_len = params->ssid_len;
- memcpy(wdev->ssid, params->ssid, wdev->ssid_len);
+ wdev->links[link_id].ap.beacon_interval = params->beacon_interval;
+ wdev->links[link_id].ap.chandef = params->chandef;
+ wdev->u.ap.ssid_len = params->ssid_len;
+ memcpy(wdev->u.ap.ssid, params->ssid,
+ params->ssid_len);
if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
wdev->conn_owner_nlportid = info->snd_portid;
}
+out_unlock:
wdev_unlock(wdev);
-
out:
kfree(params->acl);
kfree(params->beacon.mbssid_ies);
@@ -5750,6 +6065,7 @@ out:
static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_beacon_data params;
@@ -5762,7 +6078,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->change_beacon)
return -EOPNOTSUPP;
- if (!wdev->beacon_interval)
+ if (!wdev->links[link_id].ap.beacon_interval)
return -EINVAL;
err = nl80211_parse_beacon(rdev, info->attrs, &params);
@@ -5781,9 +6097,10 @@ out:
static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
- return cfg80211_stop_ap(rdev, dev, false);
+ return cfg80211_stop_ap(rdev, dev, link_id, false);
}
static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
@@ -5919,6 +6236,14 @@ bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr)
case RATE_INFO_BW_HE_RU:
rate_flg = 0;
WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS));
+ break;
+ case RATE_INFO_BW_320:
+ rate_flg = NL80211_RATE_INFO_320_MHZ_WIDTH;
+ break;
+ case RATE_INFO_BW_EHT_RU:
+ rate_flg = 0;
+ WARN_ON(!(info->flags & RATE_INFO_FLAGS_EHT_MCS));
+ break;
}
if (rate_flg && nla_put_flag(msg, rate_flg))
@@ -5951,6 +6276,17 @@ bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr)
nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC,
info->he_ru_alloc))
return false;
+ } else if (info->flags & RATE_INFO_FLAGS_EHT_MCS) {
+ if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_MCS, info->mcs))
+ return false;
+ if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_NSS, info->nss))
+ return false;
+ if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_GI, info->eht_gi))
+ return false;
+ if (info->bw == RATE_INFO_BW_EHT_RU &&
+ nla_put_u8(msg, NL80211_RATE_INFO_EHT_RU_ALLOC,
+ info->eht_ru_alloc))
+ return false;
}
nla_nest_end(msg, rate);
@@ -6362,10 +6698,12 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EINVAL;
if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY)
return -EINVAL;
- if (params->supported_rates)
+ if (params->link_sta_params.supported_rates)
return -EINVAL;
- if (params->ext_capab || params->ht_capa || params->vht_capa ||
- params->he_capa)
+ if (params->ext_capab || params->link_sta_params.ht_capa ||
+ params->link_sta_params.vht_capa ||
+ params->link_sta_params.he_capa ||
+ params->link_sta_params.eht_capa)
return -EINVAL;
}
@@ -6413,7 +6751,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EINVAL;
/* force (at least) rates when authorizing */
if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) &&
- !params->supported_rates)
+ !params->link_sta_params.supported_rates)
return -EINVAL;
break;
case CFG80211_STA_TDLS_PEER_ACTIVE:
@@ -6437,7 +6775,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
*/
if (statype != CFG80211_STA_AP_CLIENT_UNASSOC &&
statype != CFG80211_STA_TDLS_PEER_SETUP)
- params->opmode_notif_used = false;
+ params->link_sta_params.opmode_notif_used = false;
return 0;
}
@@ -6483,12 +6821,6 @@ static struct net_device *get_vlan(struct genl_info *info,
return ERR_PTR(ret);
}
-static const struct nla_policy
-nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
- [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 },
- [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
-};
-
static int nl80211_parse_sta_wme(struct genl_info *info,
struct station_parameters *params)
{
@@ -6558,16 +6890,29 @@ static int nl80211_set_station_tdls(struct genl_info *info,
if (info->attrs[NL80211_ATTR_PEER_AID])
params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
- params->ht_capa =
+ params->link_sta_params.ht_capa =
nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
- params->vht_capa =
+ params->link_sta_params.vht_capa =
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
- params->he_capa =
+ params->link_sta_params.he_capa =
nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
- params->he_capa_len =
+ params->link_sta_params.he_capa_len =
nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+ if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) {
+ params->link_sta_params.eht_capa =
+ nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]);
+ params->link_sta_params.eht_capa_len =
+ nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]);
+
+ if (!ieee80211_eht_capa_size_ok((const u8 *)params->link_sta_params.he_capa,
+ (const u8 *)params->link_sta_params.eht_capa,
+ params->link_sta_params.eht_capa_len,
+ false))
+ return -EINVAL;
+ }
}
err = nl80211_parse_sta_channel_info(info, params);
@@ -6578,7 +6923,8 @@ static int nl80211_set_station_tdls(struct genl_info *info,
}
static int nl80211_parse_sta_txpower_setting(struct genl_info *info,
- struct station_parameters *params)
+ struct sta_txpwr *txpwr,
+ bool *txpwr_set)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
int idx;
@@ -6590,18 +6936,20 @@ static int nl80211_parse_sta_txpower_setting(struct genl_info *info,
return -EOPNOTSUPP;
idx = NL80211_ATTR_STA_TX_POWER_SETTING;
- params->txpwr.type = nla_get_u8(info->attrs[idx]);
+ txpwr->type = nla_get_u8(info->attrs[idx]);
- if (params->txpwr.type == NL80211_TX_POWER_LIMITED) {
+ if (txpwr->type == NL80211_TX_POWER_LIMITED) {
idx = NL80211_ATTR_STA_TX_POWER;
if (info->attrs[idx])
- params->txpwr.power =
- nla_get_s16(info->attrs[idx]);
+ txpwr->power = nla_get_s16(info->attrs[idx]);
else
return -EINVAL;
}
- params->sta_modify_mask |= STATION_PARAM_APPLY_STA_TXPOWER;
+
+ *txpwr_set = true;
+ } else {
+ *txpwr_set = false;
}
return 0;
@@ -6646,12 +6994,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
- mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ params.link_sta_params.link_id =
+ nl80211_link_id_or_invalid(info->attrs);
+
+ if (info->attrs[NL80211_ATTR_MLD_ADDR]) {
+ /* If MLD_ADDR attribute is set then this is an MLD station
+ * and the MLD_ADDR attribute holds the MLD address and the
+ * MAC attribute holds for the LINK address.
+ * In that case, the link_id is also expected to be valid.
+ */
+ if (params.link_sta_params.link_id < 0)
+ return -EINVAL;
+
+ mac_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+ params.link_sta_params.mld_mac = mac_addr;
+ params.link_sta_params.link_mac =
+ nla_data(info->attrs[NL80211_ATTR_MAC]);
+ if (!is_valid_ether_addr(params.link_sta_params.link_mac))
+ return -EINVAL;
+ } else {
+ mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ }
+
if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) {
- params.supported_rates =
+ params.link_sta_params.supported_rates =
nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
- params.supported_rates_len =
+ params.link_sta_params.supported_rates_len =
nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
}
@@ -6689,13 +7058,13 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]);
if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
- params.opmode_notif_used = true;
- params.opmode_notif =
+ params.link_sta_params.opmode_notif_used = true;
+ params.link_sta_params.opmode_notif =
nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
}
if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
- params.he_6ghz_capa =
+ params.link_sta_params.he_6ghz_capa =
nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]);
if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
@@ -6707,7 +7076,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
return -EOPNOTSUPP;
- err = nl80211_parse_sta_txpower_setting(info, &params);
+ err = nl80211_parse_sta_txpower_setting(info,
+ &params.link_sta_params.txpwr,
+ &params.link_sta_params.txpwr_set);
if (err)
return err;
@@ -6735,7 +7106,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
}
/* driver will call cfg80211_check_station_change() */
+ wdev_lock(dev->ieee80211_ptr);
err = rdev_change_station(rdev, dev, mac_addr, &params);
+ wdev_unlock(dev->ieee80211_ptr);
out_put_vlan:
dev_put(params.vlan);
@@ -6748,6 +7121,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
int err;
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
struct station_parameters params;
u8 *mac_addr = NULL;
u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) |
@@ -6771,10 +7145,23 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NL80211_ATTR_PEER_AID])
return -EINVAL;
- mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
- params.supported_rates =
+ params.link_sta_params.link_id =
+ nl80211_link_id_or_invalid(info->attrs);
+
+ if (info->attrs[NL80211_ATTR_MLD_ADDR]) {
+ mac_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+ params.link_sta_params.mld_mac = mac_addr;
+ params.link_sta_params.link_mac =
+ nla_data(info->attrs[NL80211_ATTR_MAC]);
+ if (!is_valid_ether_addr(params.link_sta_params.link_mac))
+ return -EINVAL;
+ } else {
+ mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ }
+
+ params.link_sta_params.supported_rates =
nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
- params.supported_rates_len =
+ params.link_sta_params.supported_rates_len =
nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
params.listen_interval =
nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
@@ -6813,27 +7200,40 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
- params.ht_capa =
+ params.link_sta_params.ht_capa =
nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
- params.vht_capa =
+ params.link_sta_params.vht_capa =
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
- params.he_capa =
+ params.link_sta_params.he_capa =
nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
- params.he_capa_len =
+ params.link_sta_params.he_capa_len =
nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+ if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) {
+ params.link_sta_params.eht_capa =
+ nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]);
+ params.link_sta_params.eht_capa_len =
+ nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]);
+
+ if (!ieee80211_eht_capa_size_ok((const u8 *)params.link_sta_params.he_capa,
+ (const u8 *)params.link_sta_params.eht_capa,
+ params.link_sta_params.eht_capa_len,
+ false))
+ return -EINVAL;
+ }
}
if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
- params.he_6ghz_capa =
+ params.link_sta_params.he_6ghz_capa =
nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]);
if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
- params.opmode_notif_used = true;
- params.opmode_notif =
+ params.link_sta_params.opmode_notif_used = true;
+ params.link_sta_params.opmode_notif =
nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
}
@@ -6850,7 +7250,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
return -EOPNOTSUPP;
- err = nl80211_parse_sta_txpower_setting(info, &params);
+ err = nl80211_parse_sta_txpower_setting(info,
+ &params.link_sta_params.txpwr,
+ &params.link_sta_params.txpwr_set);
if (err)
return err;
@@ -6871,16 +7273,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
* error in this case.
*/
if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
- params.ht_capa = NULL;
- params.vht_capa = NULL;
+ params.link_sta_params.ht_capa = NULL;
+ params.link_sta_params.vht_capa = NULL;
- /* HE requires WME */
- if (params.he_capa_len || params.he_6ghz_capa)
+ /* HE and EHT require WME */
+ if (params.link_sta_params.he_capa_len ||
+ params.link_sta_params.he_6ghz_capa ||
+ params.link_sta_params.eht_capa_len)
return -EINVAL;
}
/* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */
- if (params.he_6ghz_capa && (params.ht_capa || params.vht_capa))
+ if (params.link_sta_params.he_6ghz_capa &&
+ (params.link_sta_params.ht_capa || params.link_sta_params.vht_capa))
return -EINVAL;
/* When you run into this, adjust the code below for the new flag */
@@ -6971,8 +7376,25 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
/* be aware of params.vlan when changing code here */
+ wdev_lock(dev->ieee80211_ptr);
+ if (wdev->valid_links) {
+ if (params.link_sta_params.link_id < 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ if (!(wdev->valid_links & BIT(params.link_sta_params.link_id))) {
+ err = -ENOLINK;
+ goto out;
+ }
+ } else {
+ if (params.link_sta_params.link_id >= 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
err = rdev_add_station(rdev, dev, mac_addr, &params);
-
+out:
+ wdev_unlock(dev->ieee80211_ptr);
dev_put(params.vlan);
return err;
}
@@ -6982,6 +7404,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct station_del_parameters params;
+ int ret;
memset(&params, 0, sizeof(params));
@@ -7029,7 +7452,11 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID;
}
- return rdev_del_station(rdev, dev, &params);
+ wdev_lock(dev->ieee80211_ptr);
+ ret = rdev_del_station(rdev, dev, &params);
+ wdev_unlock(dev->ieee80211_ptr);
+
+ return ret;
}
static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
@@ -7489,7 +7916,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
wdev_lock(wdev);
/* If not connected, get default parameters */
- if (!wdev->mesh_id_len)
+ if (!wdev->u.mesh.id_len)
memcpy(&cur_params, &default_mesh_config, sizeof(cur_params));
else
err = rdev_get_mesh_config(rdev, dev, &cur_params);
@@ -7870,7 +8297,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
return err;
wdev_lock(wdev);
- if (!wdev->mesh_id_len)
+ if (!wdev->u.mesh.id_len)
err = -ENOLINK;
if (!err)
@@ -7948,6 +8375,7 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev;
struct wiphy *wiphy = NULL;
struct sk_buff *msg;
+ int err = -EMSGSIZE;
void *hdr;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -7966,34 +8394,35 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
if (IS_ERR(rdev)) {
- nlmsg_free(msg);
- rtnl_unlock();
- return PTR_ERR(rdev);
+ err = PTR_ERR(rdev);
+ goto nla_put_failure;
}
wiphy = &rdev->wiphy;
self_managed = wiphy->regulatory_flags &
REGULATORY_WIPHY_SELF_MANAGED;
+
+ rcu_read_lock();
+
regdom = get_wiphy_regdom(wiphy);
/* a self-managed-reg device must have a private regdom */
if (WARN_ON(!regdom && self_managed)) {
- nlmsg_free(msg);
- rtnl_unlock();
- return -EINVAL;
+ err = -EINVAL;
+ goto nla_put_failure_rcu;
}
if (regdom &&
nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
- goto nla_put_failure;
+ goto nla_put_failure_rcu;
+ } else {
+ rcu_read_lock();
}
if (!wiphy && reg_last_request_cell_base() &&
nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
NL80211_USER_REG_HINT_CELL_BASE))
- goto nla_put_failure;
-
- rcu_read_lock();
+ goto nla_put_failure_rcu;
if (!regdom)
regdom = rcu_dereference(cfg80211_regdomain);
@@ -8013,7 +8442,7 @@ nla_put_failure:
rtnl_unlock();
put_failure:
nlmsg_free(msg);
- return -EMSGSIZE;
+ return err;
}
static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
@@ -8059,19 +8488,19 @@ static int nl80211_get_reg_dump(struct sk_buff *skb,
struct cfg80211_registered_device *rdev;
int err, reg_idx, start = cb->args[2];
- rtnl_lock();
+ rcu_read_lock();
if (cfg80211_regdomain && start == 0) {
err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq,
NLM_F_MULTI, NULL,
- rtnl_dereference(cfg80211_regdomain));
+ rcu_dereference(cfg80211_regdomain));
if (err < 0)
goto out_err;
}
/* the global regdom is idx 0 */
reg_idx = 1;
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
regdom = get_wiphy_regdom(&rdev->wiphy);
if (!regdom)
continue;
@@ -8090,7 +8519,7 @@ static int nl80211_get_reg_dump(struct sk_buff *skb,
cb->args[2] = reg_idx;
err = skb->len;
out_err:
- rtnl_unlock();
+ rcu_read_unlock();
return err;
}
@@ -8360,14 +8789,44 @@ int nl80211_parse_random_mac(struct nlattr **attrs,
return 0;
}
-static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
+static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
{
+ unsigned int link_id;
+ bool all_ok = true;
+
ASSERT_WDEV_LOCK(wdev);
if (!cfg80211_beaconing_iface_active(wdev))
return true;
- if (!(wdev->chandef.chan->flags & IEEE80211_CHAN_RADAR))
+ /*
+ * FIXME: check if we have a free HW resource/link for chan
+ *
+ * This, as well as the FIXME below, requires knowing the link
+ * capabilities of the hardware.
+ */
+
+ /* we cannot leave radar channels */
+ for_each_valid_link(wdev, link_id) {
+ struct cfg80211_chan_def *chandef;
+
+ chandef = wdev_chandef(wdev, link_id);
+ if (!chandef)
+ continue;
+
+ /*
+ * FIXME: don't require all_ok, but rather check only the
+ * correct HW resource/link onto which 'chan' falls,
+ * as only that link leaves the channel for doing
+ * the off-channel operation.
+ */
+
+ if (chandef->chan->flags & IEEE80211_CHAN_RADAR)
+ all_ok = false;
+ }
+
+ if (all_ok)
return true;
return regulatory_pre_cac_allowed(wdev->wiphy);
@@ -8450,7 +8909,7 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
int err;
if (!(wiphy->features & randomness_flag) ||
- (wdev && wdev->current_bss))
+ (wdev && wdev->connected))
return -EOPNOTSUPP;
err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
@@ -8587,17 +9046,14 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
request->n_channels = i;
wdev_lock(wdev);
- if (!cfg80211_off_channel_oper_allowed(wdev)) {
- struct ieee80211_channel *chan;
+ for (i = 0; i < request->n_channels; i++) {
+ struct ieee80211_channel *chan = request->channels[i];
- if (request->n_channels != 1) {
- wdev_unlock(wdev);
- err = -EBUSY;
- goto out_free;
- }
+ /* if we can go off-channel to the target channel we're good */
+ if (cfg80211_off_channel_oper_allowed(wdev, chan))
+ continue;
- chan = request->channels[0];
- if (chan->center_freq != wdev->chandef.chan->center_freq) {
+ if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) {
wdev_unlock(wdev);
err = -EBUSY;
goto out_free;
@@ -9342,7 +9798,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms);
if (!err) {
- wdev->chandef = chandef;
+ wdev->links[0].ap.chandef = chandef;
wdev->cac_started = true;
wdev->cac_start_time = jiffies;
wdev->cac_time_ms = cac_time_ms;
@@ -9410,6 +9866,7 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb,
static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_csa_settings params;
@@ -9436,15 +9893,15 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
need_handle_dfs_flag = false;
/* useless if AP is not running */
- if (!wdev->beacon_interval)
+ if (!wdev->links[link_id].ap.beacon_interval)
return -ENOTCONN;
break;
case NL80211_IFTYPE_ADHOC:
- if (!wdev->ssid_len)
+ if (!wdev->u.ibss.ssid_len)
return -ENOTCONN;
break;
case NL80211_IFTYPE_MESH_POINT:
- if (!wdev->mesh_id_len)
+ if (!wdev->u.mesh.id_len)
return -ENOTCONN;
break;
default:
@@ -9615,6 +10072,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
{
struct cfg80211_bss *res = &intbss->pub;
const struct cfg80211_bss_ies *ies;
+ unsigned int link_id;
void *hdr;
struct nlattr *bss;
@@ -9719,13 +10177,20 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_STATION:
- if (intbss == wdev->current_bss &&
- nla_put_u32(msg, NL80211_BSS_STATUS,
- NL80211_BSS_STATUS_ASSOCIATED))
- goto nla_put_failure;
+ for_each_valid_link(wdev, link_id) {
+ if (intbss == wdev->links[link_id].client.current_bss &&
+ (nla_put_u32(msg, NL80211_BSS_STATUS,
+ NL80211_BSS_STATUS_ASSOCIATED) ||
+ (wdev->valid_links &&
+ (nla_put_u8(msg, NL80211_BSS_MLO_LINK_ID,
+ link_id) ||
+ nla_put(msg, NL80211_BSS_MLD_ADDR, ETH_ALEN,
+ wdev->u.client.connected_addr)))))
+ goto nla_put_failure;
+ }
break;
case NL80211_IFTYPE_ADHOC:
- if (intbss == wdev->current_bss &&
+ if (intbss == wdev->u.ibss.current_bss &&
nla_put_u32(msg, NL80211_BSS_STATUS,
NL80211_BSS_STATUS_IBSS_JOINED))
goto nla_put_failure;
@@ -9909,7 +10374,9 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
}
while (1) {
+ wdev_lock(wdev);
res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey);
+ wdev_unlock(wdev);
if (res == -ENOENT)
break;
if (res)
@@ -9951,11 +10418,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct ieee80211_channel *chan;
- const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL;
- int err, ssid_len, ie_len = 0, auth_data_len = 0;
+ const u8 *bssid, *ssid;
+ int err, ssid_len;
enum nl80211_auth_type auth_type;
struct key_parse key;
bool local_state_change;
+ struct cfg80211_auth_request req = {};
u32 freq;
if (!info->attrs[NL80211_ATTR_MAC])
@@ -10026,8 +10494,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
if (info->attrs[NL80211_ATTR_IE]) {
- ie = nla_data(info->attrs[NL80211_ATTR_IE]);
- ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+ req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+ req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
}
auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
@@ -10047,8 +10515,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
auth_type != NL80211_AUTHTYPE_FILS_SK_PFS &&
auth_type != NL80211_AUTHTYPE_FILS_PK)
return -EINVAL;
- auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
- auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
+ req.auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
+ req.auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
}
local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
@@ -10060,12 +10528,31 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
if (local_state_change)
return 0;
+ req.auth_type = auth_type;
+ req.key = key.p.key;
+ req.key_len = key.p.key_len;
+ req.key_idx = key.idx;
+ req.link_id = nl80211_link_id_or_invalid(info->attrs);
+ if (req.link_id >= 0) {
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO))
+ return -EINVAL;
+ if (!info->attrs[NL80211_ATTR_MLD_ADDR])
+ return -EINVAL;
+ req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+ }
+
+ req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
+ IEEE80211_BSS_TYPE_ESS,
+ IEEE80211_PRIVACY_ANY);
+ if (!req.bss)
+ return -ENOENT;
+
wdev_lock(dev->ieee80211_ptr);
- err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
- ssid, ssid_len, ie, ie_len,
- key.p.key, key.p.key_len, key.idx,
- auth_data, auth_data_len);
+ err = cfg80211_mlme_auth(rdev, dev, &req);
wdev_unlock(dev->ieee80211_ptr);
+
+ cfg80211_put_bss(&rdev->wiphy, req.bss);
+
return err;
}
@@ -10169,7 +10656,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
if (len % sizeof(u32))
return -EINVAL;
- if (settings->n_akm_suites > NL80211_MAX_NR_AKM_SUITES)
+ if (settings->n_akm_suites > rdev->wiphy.max_num_akm_suites)
return -EINVAL;
memcpy(settings->akm_suites, data, len);
@@ -10207,23 +10694,55 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
return 0;
}
+static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev,
+ const u8 *ssid, int ssid_len,
+ struct nlattr **attrs,
+ const u8 **bssid_out)
+{
+ struct ieee80211_channel *chan;
+ struct cfg80211_bss *bss;
+ const u8 *bssid;
+ u32 freq;
+
+ if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_WIPHY_FREQ])
+ return ERR_PTR(-EINVAL);
+
+ bssid = nla_data(attrs[NL80211_ATTR_MAC]);
+
+ freq = MHZ_TO_KHZ(nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]));
+ if (attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+ freq += nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+
+ chan = nl80211_get_valid_chan(&rdev->wiphy, freq);
+ if (!chan)
+ return ERR_PTR(-EINVAL);
+
+ bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid,
+ ssid, ssid_len,
+ IEEE80211_BSS_TYPE_ESS,
+ IEEE80211_PRIVACY_ANY);
+ if (!bss)
+ return ERR_PTR(-ENOENT);
+
+ *bssid_out = bssid;
+ return bss;
+}
+
static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct ieee80211_channel *chan;
struct cfg80211_assoc_request req = {};
+ struct nlattr **attrs = NULL;
const u8 *bssid, *ssid;
- int err, ssid_len = 0;
- u32 freq;
+ unsigned int link_id;
+ int err, ssid_len;
if (dev->ieee80211_ptr->conn_owner_nlportid &&
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
return -EPERM;
- if (!info->attrs[NL80211_ATTR_MAC] ||
- !info->attrs[NL80211_ATTR_SSID] ||
- !info->attrs[NL80211_ATTR_WIPHY_FREQ])
+ if (!info->attrs[NL80211_ATTR_SSID])
return -EINVAL;
if (!rdev->ops->assoc)
@@ -10233,22 +10752,19 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;
- bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
-
- freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
- if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
- freq +=
- nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
- chan = nl80211_get_valid_chan(&rdev->wiphy, freq);
- if (!chan)
- return -EINVAL;
-
ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
if (info->attrs[NL80211_ATTR_IE]) {
req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+
+ if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ req.ie, req.ie_len)) {
+ GENL_SET_ERR_MSG(info,
+ "non-inheritance makes no sense");
+ return -EINVAL;
+ }
}
if (info->attrs[NL80211_ATTR_USE_MFP]) {
@@ -10285,6 +10801,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE]))
req.flags |= ASSOC_REQ_DISABLE_HE;
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT]))
+ req.flags |= ASSOC_REQ_DISABLE_EHT;
+
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
memcpy(&req.vht_capa_mask,
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
@@ -10333,12 +10852,113 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
sizeof(req.s1g_capa));
}
+ req.link_id = nl80211_link_id_or_invalid(info->attrs);
+
+ if (info->attrs[NL80211_ATTR_MLO_LINKS]) {
+ unsigned int attrsize = NUM_NL80211_ATTR * sizeof(*attrs);
+ struct nlattr *link;
+ int rem = 0;
+
+ if (req.link_id < 0)
+ return -EINVAL;
+
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO))
+ return -EINVAL;
+
+ if (info->attrs[NL80211_ATTR_MAC] ||
+ info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
+ !info->attrs[NL80211_ATTR_MLD_ADDR])
+ return -EINVAL;
+
+ req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+
+ attrs = kzalloc(attrsize, GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
+ nla_for_each_nested(link,
+ info->attrs[NL80211_ATTR_MLO_LINKS],
+ rem) {
+ memset(attrs, 0, attrsize);
+
+ nla_parse_nested(attrs, NL80211_ATTR_MAX,
+ link, NULL, NULL);
+
+ if (!attrs[NL80211_ATTR_MLO_LINK_ID]) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ link_id = nla_get_u8(attrs[NL80211_ATTR_MLO_LINK_ID]);
+ /* cannot use the same link ID again */
+ if (req.links[link_id].bss) {
+ err = -EINVAL;
+ goto free;
+ }
+ req.links[link_id].bss =
+ nl80211_assoc_bss(rdev, ssid, ssid_len, attrs,
+ &bssid);
+ if (IS_ERR(req.links[link_id].bss)) {
+ err = PTR_ERR(req.links[link_id].bss);
+ req.links[link_id].bss = NULL;
+ goto free;
+ }
+
+ if (attrs[NL80211_ATTR_IE]) {
+ req.links[link_id].elems =
+ nla_data(attrs[NL80211_ATTR_IE]);
+ req.links[link_id].elems_len =
+ nla_len(attrs[NL80211_ATTR_IE]);
+
+ if (cfg80211_find_elem(WLAN_EID_FRAGMENT,
+ req.links[link_id].elems,
+ req.links[link_id].elems_len)) {
+ GENL_SET_ERR_MSG(info,
+ "cannot deal with fragmentation");
+ err = -EINVAL;
+ goto free;
+ }
+
+ if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ req.links[link_id].elems,
+ req.links[link_id].elems_len)) {
+ GENL_SET_ERR_MSG(info,
+ "cannot deal with non-inheritance");
+ err = -EINVAL;
+ goto free;
+ }
+ }
+ }
+
+ if (!req.links[req.link_id].bss) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ if (req.links[req.link_id].elems_len) {
+ GENL_SET_ERR_MSG(info,
+ "cannot have per-link elems on assoc link");
+ err = -EINVAL;
+ goto free;
+ }
+
+ kfree(attrs);
+ attrs = NULL;
+ } else {
+ if (req.link_id >= 0)
+ return -EINVAL;
+
+ req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs,
+ &bssid);
+ if (IS_ERR(req.bss))
+ return PTR_ERR(req.bss);
+ }
+
err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
if (!err) {
wdev_lock(dev->ieee80211_ptr);
- err = cfg80211_mlme_assoc(rdev, dev, chan, bssid,
- ssid, ssid_len, &req);
+ err = cfg80211_mlme_assoc(rdev, dev, &req);
if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
dev->ieee80211_ptr->conn_owner_nlportid =
@@ -10350,6 +10970,12 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
wdev_unlock(dev->ieee80211_ptr);
}
+free:
+ for (link_id = 0; link_id < ARRAY_SIZE(req.links); link_id++)
+ cfg80211_put_bss(&rdev->wiphy, req.links[link_id].bss);
+ cfg80211_put_bss(&rdev->wiphy, req.bss);
+ kfree(attrs);
+
return err;
}
@@ -10552,6 +11178,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
NL80211_EXT_FEATURE_VHT_IBSS))
return -EINVAL;
break;
+ case NL80211_CHAN_WIDTH_320:
+ return -EINVAL;
default:
return -EINVAL;
}
@@ -10653,7 +11281,6 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
int mcast_rate[NUM_NL80211_BANDS];
u32 nla_rate;
- int err;
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
@@ -10672,9 +11299,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate))
return -EINVAL;
- err = rdev_set_mcast_rate(rdev, dev, mcast_rate);
-
- return err;
+ return rdev_set_mcast_rate(rdev, dev, mcast_rate);
}
static struct sk_buff *
@@ -11071,6 +11696,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE]))
connect.flags |= ASSOC_REQ_DISABLE_HE;
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT]))
+ connect.flags |= ASSOC_REQ_DISABLE_EHT;
+
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
memcpy(&connect.vht_capa_mask,
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
@@ -11158,6 +11786,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
}
+ if (nla_get_flag(info->attrs[NL80211_ATTR_MLO_SUPPORT]))
+ connect.flags |= CONNECT_REQ_MLO_SUPPORT;
+
wdev_lock(dev->ieee80211_ptr);
err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -11251,7 +11882,7 @@ static int nl80211_update_connect_params(struct sk_buff *skb,
}
wdev_lock(dev->ieee80211_ptr);
- if (!wdev->current_bss)
+ if (!wdev->connected)
ret = -ENOLINK;
else
ret = rdev_update_connect_params(rdev, dev, &connect, changed);
@@ -11464,9 +12095,9 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct wireless_dev *wdev = info->user_ptr[1];
struct cfg80211_chan_def chandef;
- const struct cfg80211_chan_def *compat_chandef;
struct sk_buff *msg;
void *hdr;
u64 cookie;
@@ -11496,10 +12127,22 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
return err;
wdev_lock(wdev);
- if (!cfg80211_off_channel_oper_allowed(wdev) &&
- !cfg80211_chandef_identical(&wdev->chandef, &chandef)) {
- compat_chandef = cfg80211_chandef_compatible(&wdev->chandef,
- &chandef);
+ if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) {
+ const struct cfg80211_chan_def *oper_chandef, *compat_chandef;
+
+ oper_chandef = wdev_chandef(wdev, link_id);
+
+ if (WARN_ON(!oper_chandef)) {
+ /* cannot happen since we must beacon to get here */
+ WARN_ON(1);
+ wdev_unlock(wdev);
+ return -EBUSY;
+ }
+
+ /* note: returns first one if identical chandefs */
+ compat_chandef = cfg80211_chandef_compatible(&chandef,
+ oper_chandef);
+
if (compat_chandef != &chandef) {
wdev_unlock(wdev);
return -EBUSY;
@@ -11561,20 +12204,26 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
struct genl_info *info)
{
struct cfg80211_bitrate_mask mask;
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
+ wdev_lock(wdev);
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES, &mask,
- dev, true);
+ dev, true, link_id);
if (err)
- return err;
+ goto out;
- return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ err = rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask);
+out:
+ wdev_unlock(wdev);
+ return err;
}
static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
@@ -11696,10 +12345,23 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
wdev_lock(wdev);
- if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev)) {
+ if (params.offchan &&
+ !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) {
wdev_unlock(wdev);
return -EBUSY;
}
+
+ params.link_id = nl80211_link_id_or_invalid(info->attrs);
+ /*
+ * This now races due to the unlock, but we cannot check
+ * the valid links for the _station_ anyway, so that's up
+ * to the driver.
+ */
+ if (params.link_id >= 0 &&
+ !(wdev->valid_links & BIT(params.link_id))) {
+ wdev_unlock(wdev);
+ return -EINVAL;
+ }
wdev_unlock(wdev);
params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
@@ -11914,12 +12576,13 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
* connection is established and enough beacons received to calculate
* the average.
*/
- if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
+ if (!wdev->cqm_config->last_rssi_event_value &&
+ wdev->links[0].client.current_bss &&
rdev->ops->get_station) {
struct station_info sinfo = {};
u8 *mac_addr;
- mac_addr = wdev->current_bss->pub.bssid;
+ mac_addr = wdev->links[0].client.current_bss->pub.bssid;
err = rdev_get_station(rdev, dev, mac_addr, &sinfo);
if (err)
@@ -12182,7 +12845,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES,
&setup.beacon_rate,
- dev, false);
+ dev, false, 0);
if (err)
return err;
@@ -12602,7 +13265,9 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
wake_mask_size);
if (tok) {
cfg->tokens_size = tokens_size;
- memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size);
+ cfg->payload_tok = *tok;
+ memcpy(cfg->payload_tok.token_stream, tok->token_stream,
+ tokens_size);
}
trig->tcp = cfg;
@@ -13152,7 +13817,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]);
wdev_lock(wdev);
- if (!wdev->current_bss) {
+ if (!wdev->connected) {
err = -ENOTCONN;
goto out;
}
@@ -13411,6 +14076,9 @@ static int handle_nan_filter(struct nlattr *attr_filter,
i = 0;
nla_for_each_nested(attr, attr_filter, rem) {
filter[i].filter = nla_memdup(attr, GFP_KERNEL);
+ if (!filter[i].filter)
+ goto err;
+
filter[i].len = nla_len(attr);
i++;
}
@@ -13423,6 +14091,15 @@ static int handle_nan_filter(struct nlattr *attr_filter,
}
return 0;
+
+err:
+ i = 0;
+ nla_for_each_nested(attr, attr_filter, rem) {
+ kfree(filter[i].filter);
+ i++;
+ }
+ kfree(filter);
+ return -ENOMEM;
}
static int nl80211_nan_add_func(struct sk_buff *skb,
@@ -14409,7 +15086,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- if (wdev->current_bss)
+ if (wdev->connected)
break;
err = -ENOTCONN;
goto out;
@@ -14582,13 +15259,13 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
wdev_lock(wdev);
- if (!wdev->current_bss) {
+ if (!wdev->connected) {
ret = -ENOTCONN;
goto out;
}
pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]);
- if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) {
+ if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) {
ret = -EINVAL;
goto out;
}
@@ -14692,6 +15369,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
u16 proto;
bool noencrypt;
u64 cookie = 0;
+ int link_id;
int err;
if (!wiphy_ext_feature_isset(&rdev->wiphy,
@@ -14716,9 +15394,13 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_MESH_POINT:
break;
case NL80211_IFTYPE_ADHOC:
+ if (wdev->u.ibss.current_bss)
+ break;
+ err = -ENOTCONN;
+ goto out;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- if (wdev->current_bss)
+ if (wdev->connected)
break;
err = -ENOTCONN;
goto out;
@@ -14736,8 +15418,10 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
noencrypt =
nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]);
+ link_id = nl80211_link_id_or_invalid(info->attrs);
+
err = rdev_tx_control_port(rdev, dev, buf, len,
- dest, cpu_to_be16(proto), noencrypt,
+ dest, cpu_to_be16(proto), noencrypt, link_id,
dont_wait_for_ack ? NULL : &cookie);
if (!err && !dont_wait_for_ack)
nl_set_extack_cookie_u64(info->extack, cookie);
@@ -14754,12 +15438,14 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb,
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_ftm_responder_stats ftm_stats = {};
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct sk_buff *msg;
void *hdr;
struct nlattr *ftm_stats_attr;
int err;
- if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->beacon_interval)
+ if (wdev->iftype != NL80211_IFTYPE_AP ||
+ !wdev->links[link_id].ap.beacon_interval)
return -EOPNOTSUPP;
err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats);
@@ -14889,7 +15575,8 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info)
static int parse_tid_conf(struct cfg80211_registered_device *rdev,
struct nlattr *attrs[], struct net_device *dev,
struct cfg80211_tid_cfg *tid_conf,
- struct genl_info *info, const u8 *peer)
+ struct genl_info *info, const u8 *peer,
+ unsigned int link_id)
{
struct netlink_ext_ack *extack = info->extack;
u64 mask;
@@ -14964,7 +15651,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
attr = NL80211_TID_CONFIG_ATTR_TX_RATE;
err = nl80211_parse_tx_bitrate_mask(info, attrs, attr,
&tid_conf->txrate_mask, dev,
- true);
+ true, link_id);
if (err)
return err;
@@ -14991,6 +15678,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb,
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1];
+ unsigned int link_id = nl80211_link_id(info->attrs);
struct net_device *dev = info->user_ptr[1];
struct cfg80211_tid_config *tid_config;
struct nlattr *tid;
@@ -15018,6 +15706,8 @@ static int nl80211_set_tid_config(struct sk_buff *skb,
if (info->attrs[NL80211_ATTR_MAC])
tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ wdev_lock(dev->ieee80211_ptr);
+
nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG],
rem_conf) {
ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX,
@@ -15028,7 +15718,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb,
ret = parse_tid_conf(rdev, attrs, dev,
&tid_config->tid_conf[conf_idx],
- info, tid_config->peer);
+ info, tid_config->peer, link_id);
if (ret)
goto bad_tid_conf;
@@ -15039,6 +15729,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb,
bad_tid_conf:
kfree(tid_config);
+ wdev_unlock(dev->ieee80211_ptr);
return ret;
}
@@ -15167,6 +15858,199 @@ static int nl80211_set_fils_aad(struct sk_buff *skb,
return rdev_set_fils_aad(rdev, dev, &fils_aad);
}
+static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ unsigned int link_id = nl80211_link_id(info->attrs);
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int ret;
+
+ if (!(wdev->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))
+ return -EINVAL;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!info->attrs[NL80211_ATTR_MAC] ||
+ !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC])))
+ return -EINVAL;
+
+ wdev_lock(wdev);
+ wdev->valid_links |= BIT(link_id);
+ ether_addr_copy(wdev->links[link_id].addr,
+ nla_data(info->attrs[NL80211_ATTR_MAC]));
+
+ ret = rdev_add_intf_link(rdev, wdev, link_id);
+ if (ret) {
+ wdev->valid_links &= ~BIT(link_id);
+ eth_zero_addr(wdev->links[link_id].addr);
+ }
+ wdev_unlock(wdev);
+
+ return ret;
+}
+
+static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info)
+{
+ unsigned int link_id = nl80211_link_id(info->attrs);
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+ /* cannot remove if there's no link */
+ if (!info->attrs[NL80211_ATTR_MLO_LINK_ID])
+ return -EINVAL;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wdev_lock(wdev);
+ cfg80211_remove_link(wdev, link_id);
+ wdev_unlock(wdev);
+
+ return 0;
+}
+
+static int
+nl80211_add_mod_link_station(struct sk_buff *skb, struct genl_info *info,
+ bool add)
+{
+ struct link_station_parameters params = {};
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ int err;
+
+ if ((add && !rdev->ops->add_link_station) ||
+ (!add && !rdev->ops->mod_link_station))
+ return -EOPNOTSUPP;
+
+ if (add && !info->attrs[NL80211_ATTR_MAC])
+ return -EINVAL;
+
+ if (!info->attrs[NL80211_ATTR_MLD_ADDR])
+ return -EINVAL;
+
+ if (add && !info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES])
+ return -EINVAL;
+
+ params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+
+ if (info->attrs[NL80211_ATTR_MAC]) {
+ params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ if (!is_valid_ether_addr(params.link_mac))
+ return -EINVAL;
+ }
+
+ if (!info->attrs[NL80211_ATTR_MLO_LINK_ID])
+ return -EINVAL;
+
+ params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]);
+
+ if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) {
+ params.supported_rates =
+ nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
+ params.supported_rates_len =
+ nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
+ }
+
+ if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
+ params.ht_capa =
+ nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+
+ if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
+ params.vht_capa =
+ nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+
+ if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+ params.he_capa =
+ nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+ params.he_capa_len =
+ nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+ if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) {
+ params.eht_capa =
+ nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]);
+ params.eht_capa_len =
+ nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]);
+
+ if (!ieee80211_eht_capa_size_ok((const u8 *)params.he_capa,
+ (const u8 *)params.eht_capa,
+ params.eht_capa_len,
+ false))
+ return -EINVAL;
+ }
+ }
+
+ if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
+ params.he_6ghz_capa =
+ nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]);
+
+ if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
+ params.opmode_notif_used = true;
+ params.opmode_notif =
+ nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
+ }
+
+ err = nl80211_parse_sta_txpower_setting(info, &params.txpwr,
+ &params.txpwr_set);
+ if (err)
+ return err;
+
+ wdev_lock(dev->ieee80211_ptr);
+ if (add)
+ err = rdev_add_link_station(rdev, dev, &params);
+ else
+ err = rdev_mod_link_station(rdev, dev, &params);
+ wdev_unlock(dev->ieee80211_ptr);
+
+ return err;
+}
+
+static int
+nl80211_add_link_station(struct sk_buff *skb, struct genl_info *info)
+{
+ return nl80211_add_mod_link_station(skb, info, true);
+}
+
+static int
+nl80211_modify_link_station(struct sk_buff *skb, struct genl_info *info)
+{
+ return nl80211_add_mod_link_station(skb, info, false);
+}
+
+static int
+nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info)
+{
+ struct link_station_del_parameters params = {};
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ int ret;
+
+ if (!rdev->ops->del_link_station)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_MLD_ADDR] ||
+ !info->attrs[NL80211_ATTR_MLO_LINK_ID])
+ return -EINVAL;
+
+ params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+ params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]);
+
+ wdev_lock(dev->ieee80211_ptr);
+ ret = rdev_del_link_station(rdev, dev, &params);
+ wdev_unlock(dev->ieee80211_ptr);
+
+ return ret;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -15179,38 +16063,121 @@ static int nl80211_set_fils_aad(struct sk_buff *skb,
NL80211_FLAG_CHECK_NETDEV_UP)
#define NL80211_FLAG_CLEAR_SKB 0x20
#define NL80211_FLAG_NO_WIPHY_MTX 0x40
+#define NL80211_FLAG_MLO_VALID_LINK_ID 0x80
+#define NL80211_FLAG_MLO_UNSUPPORTED 0x100
+
+#define INTERNAL_FLAG_SELECTORS(__sel) \
+ SELECTOR(__sel, NONE, 0) /* must be first */ \
+ SELECTOR(__sel, WIPHY, \
+ NL80211_FLAG_NEED_WIPHY) \
+ SELECTOR(__sel, WDEV, \
+ NL80211_FLAG_NEED_WDEV) \
+ SELECTOR(__sel, NETDEV, \
+ NL80211_FLAG_NEED_NETDEV) \
+ SELECTOR(__sel, NETDEV_LINK, \
+ NL80211_FLAG_NEED_NETDEV | \
+ NL80211_FLAG_MLO_VALID_LINK_ID) \
+ SELECTOR(__sel, NETDEV_NO_MLO, \
+ NL80211_FLAG_NEED_NETDEV | \
+ NL80211_FLAG_MLO_UNSUPPORTED) \
+ SELECTOR(__sel, WIPHY_RTNL, \
+ NL80211_FLAG_NEED_WIPHY | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, WIPHY_RTNL_NOMTX, \
+ NL80211_FLAG_NEED_WIPHY | \
+ NL80211_FLAG_NEED_RTNL | \
+ NL80211_FLAG_NO_WIPHY_MTX) \
+ SELECTOR(__sel, WDEV_RTNL, \
+ NL80211_FLAG_NEED_WDEV | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, NETDEV_RTNL, \
+ NL80211_FLAG_NEED_NETDEV | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, NETDEV_UP, \
+ NL80211_FLAG_NEED_NETDEV_UP) \
+ SELECTOR(__sel, NETDEV_UP_LINK, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_MLO_VALID_LINK_ID) \
+ SELECTOR(__sel, NETDEV_UP_NO_MLO, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_MLO_UNSUPPORTED) \
+ SELECTOR(__sel, NETDEV_UP_NO_MLO_CLEAR, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_CLEAR_SKB | \
+ NL80211_FLAG_MLO_UNSUPPORTED) \
+ SELECTOR(__sel, NETDEV_UP_NOTMX, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_NO_WIPHY_MTX) \
+ SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_NO_WIPHY_MTX | \
+ NL80211_FLAG_MLO_UNSUPPORTED) \
+ SELECTOR(__sel, NETDEV_UP_CLEAR, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_CLEAR_SKB) \
+ SELECTOR(__sel, WDEV_UP, \
+ NL80211_FLAG_NEED_WDEV_UP) \
+ SELECTOR(__sel, WDEV_UP_LINK, \
+ NL80211_FLAG_NEED_WDEV_UP | \
+ NL80211_FLAG_MLO_VALID_LINK_ID) \
+ SELECTOR(__sel, WDEV_UP_RTNL, \
+ NL80211_FLAG_NEED_WDEV_UP | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, WIPHY_CLEAR, \
+ NL80211_FLAG_NEED_WIPHY | \
+ NL80211_FLAG_CLEAR_SKB)
+
+enum nl80211_internal_flags_selector {
+#define SELECTOR(_, name, value) NL80211_IFL_SEL_##name,
+ INTERNAL_FLAG_SELECTORS(_)
+#undef SELECTOR
+};
+
+static u32 nl80211_internal_flags[] = {
+#define SELECTOR(_, name, value) [NL80211_IFL_SEL_##name] = value,
+ INTERNAL_FLAG_SELECTORS(_)
+#undef SELECTOR
+};
static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
struct cfg80211_registered_device *rdev = NULL;
- struct wireless_dev *wdev;
- struct net_device *dev;
+ struct wireless_dev *wdev = NULL;
+ struct net_device *dev = NULL;
+ u32 internal_flags;
+ int err;
+
+ if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags)))
+ return -EINVAL;
+
+ internal_flags = nl80211_internal_flags[ops->internal_flags];
rtnl_lock();
- if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) {
+ if (internal_flags & NL80211_FLAG_NEED_WIPHY) {
rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
if (IS_ERR(rdev)) {
- rtnl_unlock();
- return PTR_ERR(rdev);
+ err = PTR_ERR(rdev);
+ goto out_unlock;
}
info->user_ptr[0] = rdev;
- } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV ||
- ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
+ } else if (internal_flags & NL80211_FLAG_NEED_NETDEV ||
+ internal_flags & NL80211_FLAG_NEED_WDEV) {
wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info),
info->attrs);
if (IS_ERR(wdev)) {
- rtnl_unlock();
- return PTR_ERR(wdev);
+ err = PTR_ERR(wdev);
+ goto out_unlock;
}
dev = wdev->netdev;
+ dev_hold(dev);
rdev = wiphy_to_rdev(wdev->wiphy);
- if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
+ if (internal_flags & NL80211_FLAG_NEED_NETDEV) {
if (!dev) {
- rtnl_unlock();
- return -EINVAL;
+ err = -EINVAL;
+ goto out_unlock;
}
info->user_ptr[1] = dev;
@@ -15218,32 +16185,68 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
info->user_ptr[1] = wdev;
}
- if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
+ if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
!wdev_running(wdev)) {
- rtnl_unlock();
- return -ENETDOWN;
+ err = -ENETDOWN;
+ goto out_unlock;
}
- dev_hold(dev);
info->user_ptr[0] = rdev;
}
- if (rdev && !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
+ if (internal_flags & NL80211_FLAG_MLO_VALID_LINK_ID) {
+ struct nlattr *link_id = info->attrs[NL80211_ATTR_MLO_LINK_ID];
+
+ if (!wdev) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* MLO -> require valid link ID */
+ if (wdev->valid_links &&
+ (!link_id ||
+ !(wdev->valid_links & BIT(nla_get_u8(link_id))))) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* non-MLO -> no link ID attribute accepted */
+ if (!wdev->valid_links && link_id) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
+ if (internal_flags & NL80211_FLAG_MLO_UNSUPPORTED) {
+ if (info->attrs[NL80211_ATTR_MLO_LINK_ID] ||
+ (wdev && wdev->valid_links)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
+ if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
wiphy_lock(&rdev->wiphy);
/* we keep the mutex locked until post_doit */
__release(&rdev->wiphy.mtx);
}
- if (!(ops->internal_flags & NL80211_FLAG_NEED_RTNL))
+ if (!(internal_flags & NL80211_FLAG_NEED_RTNL))
rtnl_unlock();
return 0;
+out_unlock:
+ rtnl_unlock();
+ dev_put(dev);
+ return err;
}
static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
+ u32 internal_flags = nl80211_internal_flags[ops->internal_flags];
+
if (info->user_ptr[1]) {
- if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
+ if (internal_flags & NL80211_FLAG_NEED_WDEV) {
struct wireless_dev *wdev = info->user_ptr[1];
dev_put(wdev->netdev);
@@ -15253,7 +16256,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
}
if (info->user_ptr[0] &&
- !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
+ !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
struct cfg80211_registered_device *rdev = info->user_ptr[0];
/* we kept the mutex locked since pre_doit */
@@ -15261,7 +16264,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
wiphy_unlock(&rdev->wiphy);
}
- if (ops->internal_flags & NL80211_FLAG_NEED_RTNL)
+ if (internal_flags & NL80211_FLAG_NEED_RTNL)
rtnl_unlock();
/* If needed, clear the netlink message payload from the SKB
@@ -15269,7 +16272,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
* the heap after the SKB is freed. The netlink message header
* is still needed for further processing, so leave it intact.
*/
- if (ops->internal_flags & NL80211_FLAG_CLEAR_SKB) {
+ if (internal_flags & NL80211_FLAG_CLEAR_SKB) {
struct nlmsghdr *nlh = nlmsg_hdr(skb);
memset(nlmsg_data(nlh), 0, nlmsg_len(nlh));
@@ -15379,6 +16382,11 @@ error:
return err;
}
+#define SELECTOR(__sel, name, value) \
+ ((__sel) == (value)) ? NL80211_IFL_SEL_##name :
+int __missing_selector(void);
+#define IFLAGS(__val) INTERNAL_FLAG_SELECTORS(__val) __missing_selector()
+
static const struct genl_ops nl80211_ops[] = {
{
.cmd = NL80211_CMD_GET_WIPHY,
@@ -15387,7 +16395,7 @@ static const struct genl_ops nl80211_ops[] = {
.dumpit = nl80211_dump_wiphy,
.done = nl80211_dump_wiphy_done,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
};
@@ -15404,112 +16412,117 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_interface,
.dumpit = nl80211_dump_interface,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV),
},
{
.cmd = NL80211_CMD_SET_INTERFACE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_interface,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_NEW_INTERFACE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_interface,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL |
- /* we take the wiphy mutex later ourselves */
- NL80211_FLAG_NO_WIPHY_MTX,
+ .internal_flags =
+ IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL |
+ /* we take the wiphy mutex later ourselves */
+ NL80211_FLAG_NO_WIPHY_MTX),
},
{
.cmd = NL80211_CMD_DEL_INTERFACE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_interface,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_GET_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_CLEAR_SKB,
+ /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_NEW_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEL_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_BEACON,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_set_beacon,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_START_AP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_start_ap,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_STOP_AP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_stop_ap,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_GET_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_station,
.dumpit = nl80211_dump_station,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_NEW_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_MPATH,
@@ -15517,7 +16530,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_mpath,
.dumpit = nl80211_dump_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_MPP,
@@ -15525,42 +16538,41 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_mpp,
.dumpit = nl80211_dump_mpp,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_NEW_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_BSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_bss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_REG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_reg_do,
.dumpit = nl80211_get_reg_dump,
- .internal_flags = 0,
/* can be retrieved by unprivileged users */
},
#ifdef CONFIG_CFG80211_CRDA_SUPPORT
@@ -15569,7 +16581,6 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_reg,
.flags = GENL_ADMIN_PERM,
- .internal_flags = 0,
},
#endif
{
@@ -15589,28 +16600,28 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_mesh_config,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MESH_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_mesh_config,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TRIGGER_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_trigger_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_ABORT_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_abort_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_GET_SCAN,
@@ -15622,60 +16633,58 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_sched_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_STOP_SCHED_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_sched_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_AUTHENTICATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_authenticate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_ASSOCIATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_associate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEAUTHENTICATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_deauthenticate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DISASSOCIATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_disassociate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_JOIN_IBSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_ibss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_LEAVE_IBSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_ibss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
#ifdef CONFIG_NL80211_TESTMODE
{
@@ -15684,7 +16693,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_testmode_do,
.dumpit = nl80211_testmode_dump,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
#endif
{
@@ -15692,34 +16701,32 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_connect,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_connect_params,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DISCONNECT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_disconnect,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_WIPHY_NETNS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_wiphy_netns,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL |
- NL80211_FLAG_NO_WIPHY_MTX,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_NO_WIPHY_MTX),
},
{
.cmd = NL80211_CMD_GET_SURVEY,
@@ -15731,121 +16738,124 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEL_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_FLUSH_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_flush_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_remain_on_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ /* FIXME: requiring a link ID here is probably not good */
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_cancel_remain_on_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_tx_bitrate_mask,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_REGISTER_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV),
},
{
.cmd = NL80211_CMD_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_mgmt_cancel_wait,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_SET_POWER_SAVE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_power_save,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_GET_POWER_SAVE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_power_save,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_CQM,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_cqm,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_JOIN_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_mesh,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_LEAVE_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_mesh,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_JOIN_OCB,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_ocb,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_LEAVE_OCB,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_ocb,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
#ifdef CONFIG_PM
{
@@ -15853,14 +16863,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_wowlan,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_SET_WOWLAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_wowlan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
#endif
{
@@ -15868,126 +16878,127 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_rekey_data,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_TDLS_MGMT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TDLS_OPER,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_oper,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_UNEXPECTED_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_unexpected_frame,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_PROBE_CLIENT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_probe_client,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_REGISTER_BEACONS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_beacons,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_SET_NOACK_MAP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_noack_map,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_START_P2P_DEVICE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_p2p_device,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_STOP_P2P_DEVICE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_p2p_device,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_START_NAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_nan,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_STOP_NAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_nan,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_ADD_NAN_FUNCTION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_add_func,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_NAN_FUNCTION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_del_func,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_change_config,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MCAST_RATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mcast_rate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_MAC_ACL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mac_acl,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_MLO_UNSUPPORTED),
},
{
.cmd = NL80211_CMD_RADAR_DETECT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_radar_detection,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NO_WIPHY_MTX,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NO_WIPHY_MTX |
+ NL80211_FLAG_MLO_UNSUPPORTED),
},
{
.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
@@ -15999,41 +17010,42 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_ft_ies,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_crit_protocol_start,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_crit_protocol_stop,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_GET_COALESCE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_coalesce,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_SET_COALESCE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_coalesce,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_VENDOR,
@@ -16041,140 +17053,174 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_vendor_cmd,
.dumpit = nl80211_vendor_cmd_dump,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_SET_QOS_MAP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_qos_map,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_ADD_TX_TS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_add_tx_ts,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_UNSUPPORTED),
},
{
.cmd = NL80211_CMD_DEL_TX_TS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_tx_ts,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_cancel_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_multicast_to_unicast,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_pmk,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEL_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_pmk,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_EXTERNAL_AUTH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_external_auth,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_CONTROL_PORT_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_control_port,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_ftm_responder_stats,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_PEER_MEASUREMENT_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_pmsr_start,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_NOTIFY_RADAR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_notify_radar_detection,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_UPDATE_OWE_INFO,
.doit = nl80211_update_owe_info,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_PROBE_MESH_LINK,
.doit = nl80211_probe_mesh_link,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_TID_CONFIG,
.doit = nl80211_set_tid_config,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_SET_SAR_SPECS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_sar_specs,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_COLOR_CHANGE_REQUEST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_color_change,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_FILS_AAD,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_fils_aad,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ },
+ {
+ .cmd = NL80211_CMD_ADD_LINK,
+ .doit = nl80211_add_link,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ },
+ {
+ .cmd = NL80211_CMD_REMOVE_LINK,
+ .doit = nl80211_remove_link,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
+ },
+ {
+ .cmd = NL80211_CMD_ADD_LINK_STA,
+ .doit = nl80211_add_link_station,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
+ },
+ {
+ .cmd = NL80211_CMD_MODIFY_LINK_STA,
+ .doit = nl80211_modify_link_station,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
+ },
+ {
+ .cmd = NL80211_CMD_REMOVE_LINK_STA,
+ .doit = nl80211_remove_link_station,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
};
@@ -16192,6 +17238,7 @@ static struct genl_family nl80211_fam __ro_after_init = {
.n_ops = ARRAY_SIZE(nl80211_ops),
.small_ops = nl80211_small_ops,
.n_small_ops = ARRAY_SIZE(nl80211_small_ops),
+ .resv_start_op = NL80211_CMD_REMOVE_LINK_STA + 1,
.mcgrps = nl80211_mcgrps,
.n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
.parallel_ops = true,
@@ -16569,13 +17616,13 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
}
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, const u8 *buf,
- size_t len, gfp_t gfp, int uapsd_queues,
- const u8 *req_ies, size_t req_ies_len)
+ struct net_device *netdev,
+ struct cfg80211_rx_assoc_resp *data)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_ASSOCIATE, gfp, uapsd_queues,
- req_ies, req_ies_len, false);
+ nl80211_send_mlme_event(rdev, netdev, data->buf, data->len,
+ NL80211_CMD_ASSOCIATE, GFP_KERNEL,
+ data->uapsd_queues,
+ data->req_ies, data->req_ies_len, false);
}
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
@@ -16684,10 +17731,29 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
{
struct sk_buff *msg;
void *hdr;
+ unsigned int link;
+ size_t link_info_size = 0;
+ const u8 *connected_addr = cr->valid_links ?
+ cr->ap_mld_addr : cr->links[0].bssid;
+
+ if (cr->valid_links) {
+ for_each_valid_link(cr, link) {
+ /* Nested attribute header */
+ link_info_size += NLA_HDRLEN;
+ /* Link ID */
+ link_info_size += nla_total_size(sizeof(u8));
+ link_info_size += cr->links[link].addr ?
+ nla_total_size(ETH_ALEN) : 0;
+ link_info_size += (cr->links[link].bssid ||
+ cr->links[link].bss) ?
+ nla_total_size(ETH_ALEN) : 0;
+ }
+ }
msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len +
cr->fils.kek_len + cr->fils.pmk_len +
- (cr->fils.pmkid ? WLAN_PMKID_LEN : 0), gfp);
+ (cr->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size,
+ gfp);
if (!msg)
return;
@@ -16699,8 +17765,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- (cr->bssid &&
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, cr->bssid)) ||
+ (connected_addr &&
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, connected_addr)) ||
nla_put_u16(msg, NL80211_ATTR_STATUS_CODE,
cr->status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE :
cr->status) ||
@@ -16726,6 +17792,38 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->fils.pmkid)))))
goto nla_put_failure;
+ if (cr->valid_links) {
+ int i = 1;
+ struct nlattr *nested;
+
+ nested = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS);
+ if (!nested)
+ goto nla_put_failure;
+
+ for_each_valid_link(cr, link) {
+ struct nlattr *nested_mlo_links;
+ const u8 *bssid = cr->links[link].bss ?
+ cr->links[link].bss->bssid :
+ cr->links[link].bssid;
+
+ nested_mlo_links = nla_nest_start(msg, i);
+ if (!nested_mlo_links)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link) ||
+ (bssid &&
+ nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) ||
+ (cr->links[link].addr &&
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
+ cr->links[link].addr)))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_mlo_links);
+ i++;
+ }
+ nla_nest_end(msg, nested);
+ }
+
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
@@ -16742,11 +17840,32 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
{
struct sk_buff *msg;
void *hdr;
- const u8 *bssid = info->bss ? info->bss->bssid : info->bssid;
+ size_t link_info_size = 0;
+ unsigned int link;
+ const u8 *connected_addr = info->ap_mld_addr ?
+ info->ap_mld_addr :
+ (info->links[0].bss ?
+ info->links[0].bss->bssid :
+ info->links[0].bssid);
+
+ if (info->valid_links) {
+ for_each_valid_link(info, link) {
+ /* Nested attribute header */
+ link_info_size += NLA_HDRLEN;
+ /* Link ID */
+ link_info_size += nla_total_size(sizeof(u8));
+ link_info_size += info->links[link].addr ?
+ nla_total_size(ETH_ALEN) : 0;
+ link_info_size += (info->links[link].bssid ||
+ info->links[link].bss) ?
+ nla_total_size(ETH_ALEN) : 0;
+ }
+ }
msg = nlmsg_new(100 + info->req_ie_len + info->resp_ie_len +
info->fils.kek_len + info->fils.pmk_len +
- (info->fils.pmkid ? WLAN_PMKID_LEN : 0), gfp);
+ (info->fils.pmkid ? WLAN_PMKID_LEN : 0) +
+ link_info_size, gfp);
if (!msg)
return;
@@ -16758,7 +17877,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, connected_addr) ||
(info->req_ie &&
nla_put(msg, NL80211_ATTR_REQ_IE, info->req_ie_len,
info->req_ie)) ||
@@ -16777,6 +17896,38 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, info->fils.pmkid)))
goto nla_put_failure;
+ if (info->valid_links) {
+ int i = 1;
+ struct nlattr *nested;
+
+ nested = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS);
+ if (!nested)
+ goto nla_put_failure;
+
+ for_each_valid_link(info, link) {
+ struct nlattr *nested_mlo_links;
+ const u8 *bssid = info->links[link].bss ?
+ info->links[link].bss->bssid :
+ info->links[link].bssid;
+
+ nested_mlo_links = nla_nest_start(msg, i);
+ if (!nested_mlo_links)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link) ||
+ (bssid &&
+ nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) ||
+ (info->links[link].addr &&
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
+ info->links[link].addr)))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_mlo_links);
+ i++;
+ }
+ nla_nest_end(msg, nested);
+ }
+
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
@@ -17069,7 +18220,7 @@ static void nl80211_send_remain_on_chan_event(
}
void cfg80211_assoc_comeback(struct net_device *netdev,
- struct cfg80211_bss *bss, u32 timeout)
+ const u8 *ap_addr, u32 timeout)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -17077,7 +18228,7 @@ void cfg80211_assoc_comeback(struct net_device *netdev,
struct sk_buff *msg;
void *hdr;
- trace_cfg80211_assoc_comeback(wdev, bss->bssid, timeout);
+ trace_cfg80211_assoc_comeback(wdev, ap_addr, timeout);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -17091,7 +18242,7 @@ void cfg80211_assoc_comeback(struct net_device *netdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bss->bssid) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ap_addr) ||
nla_put_u32(msg, NL80211_ATTR_TIMEOUT, timeout))
goto nla_put_failure;
@@ -17315,14 +18466,13 @@ EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlportid,
- int freq, int sig_dbm,
- const u8 *buf, size_t len, u32 flags, gfp_t gfp)
+ struct cfg80211_rx_info *info, gfp_t gfp)
{
struct net_device *netdev = wdev->netdev;
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(100 + len, gfp);
+ msg = nlmsg_new(100 + info->len, gfp);
if (!msg)
return -ENOMEM;
@@ -17337,13 +18487,23 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
netdev->ifindex)) ||
nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
NL80211_ATTR_PAD) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, freq % 1000) ||
- (sig_dbm &&
- nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
- (flags &&
- nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, flags)))
+ (info->have_link_id &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, info->link_id)) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(info->freq)) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, info->freq % 1000) ||
+ (info->sig_dbm &&
+ nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, info->sig_dbm)) ||
+ nla_put(msg, NL80211_ATTR_FRAME, info->len, info->buf) ||
+ (info->flags &&
+ nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, info->flags)) ||
+ (info->rx_tstamp && nla_put_u64_64bit(msg,
+ NL80211_ATTR_RX_HW_TIMESTAMP,
+ info->rx_tstamp,
+ NL80211_ATTR_PAD)) ||
+ (info->ack_tstamp && nla_put_u64_64bit(msg,
+ NL80211_ATTR_TX_HW_TIMESTAMP,
+ info->ack_tstamp,
+ NL80211_ATTR_PAD)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -17355,8 +18515,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
return -ENOBUFS;
}
-static void nl80211_frame_tx_status(struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack,
+static void nl80211_frame_tx_status(struct wireless_dev *wdev,
+ struct cfg80211_tx_status *status,
gfp_t gfp, enum nl80211_commands command)
{
struct wiphy *wiphy = wdev->wiphy;
@@ -17366,11 +18526,13 @@ static void nl80211_frame_tx_status(struct wireless_dev *wdev, u64 cookie,
void *hdr;
if (command == NL80211_CMD_FRAME_TX_STATUS)
- trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+ trace_cfg80211_mgmt_tx_status(wdev, status->cookie,
+ status->ack);
else
- trace_cfg80211_control_port_tx_status(wdev, cookie, ack);
+ trace_cfg80211_control_port_tx_status(wdev, status->cookie,
+ status->ack);
- msg = nlmsg_new(100 + len, gfp);
+ msg = nlmsg_new(100 + status->len, gfp);
if (!msg)
return;
@@ -17385,10 +18547,16 @@ static void nl80211_frame_tx_status(struct wireless_dev *wdev, u64 cookie,
netdev->ifindex)) ||
nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
NL80211_ATTR_PAD) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
- nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
+ nla_put(msg, NL80211_ATTR_FRAME, status->len, status->buf) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, status->cookie,
NL80211_ATTR_PAD) ||
- (ack && nla_put_flag(msg, NL80211_ATTR_ACK)))
+ (status->ack && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+ (status->tx_tstamp &&
+ nla_put_u64_64bit(msg, NL80211_ATTR_TX_HW_TIMESTAMP,
+ status->tx_tstamp, NL80211_ATTR_PAD)) ||
+ (status->ack_tstamp &&
+ nla_put_u64_64bit(msg, NL80211_ATTR_RX_HW_TIMESTAMP,
+ status->ack_tstamp, NL80211_ATTR_PAD)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -17405,18 +18573,24 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
const u8 *buf, size_t len, bool ack,
gfp_t gfp)
{
- nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp,
+ struct cfg80211_tx_status status = {
+ .cookie = cookie,
+ .buf = buf,
+ .len = len,
+ .ack = ack
+ };
+
+ nl80211_frame_tx_status(wdev, &status, gfp,
NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS);
}
EXPORT_SYMBOL(cfg80211_control_port_tx_status);
-void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack, gfp_t gfp)
+void cfg80211_mgmt_tx_status_ext(struct wireless_dev *wdev,
+ struct cfg80211_tx_status *status, gfp_t gfp)
{
- nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp,
- NL80211_CMD_FRAME_TX_STATUS);
+ nl80211_frame_tx_status(wdev, status, gfp, NL80211_CMD_FRAME_TX_STATUS);
}
-EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
+EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext);
static int __nl80211_rx_control_port(struct net_device *dev,
struct sk_buff *skb,
@@ -17760,11 +18934,13 @@ EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef,
gfp_t gfp,
enum nl80211_commands notif,
u8 count, bool quiet)
{
+ struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct sk_buff *msg;
void *hdr;
@@ -17781,6 +18957,10 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
goto nla_put_failure;
+ if (wdev->valid_links &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))
+ goto nla_put_failure;
+
if (nl80211_send_chandef(msg, chandef))
goto nla_put_failure;
@@ -17803,41 +18983,63 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
}
void cfg80211_ch_switch_notify(struct net_device *dev,
- struct cfg80211_chan_def *chandef)
+ struct cfg80211_chan_def *chandef,
+ unsigned int link_id)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
ASSERT_WDEV_LOCK(wdev);
+ WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_notify(dev, chandef);
+ trace_cfg80211_ch_switch_notify(dev, chandef, link_id);
- wdev->chandef = *chandef;
- wdev->preset_chandef = *chandef;
-
- if (wdev->iftype == NL80211_IFTYPE_STATION &&
- !WARN_ON(!wdev->current_bss))
- cfg80211_update_assoc_bss_entry(wdev, chandef->chan);
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ if (!WARN_ON(!wdev->links[link_id].client.current_bss))
+ cfg80211_update_assoc_bss_entry(wdev, link_id,
+ chandef->chan);
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ wdev->u.mesh.chandef = *chandef;
+ wdev->u.mesh.preset_chandef = *chandef;
+ break;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ wdev->links[link_id].ap.chandef = *chandef;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ wdev->u.ibss.chandef = *chandef;
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
cfg80211_sched_dfs_chan_update(rdev);
- nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
+ nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_NOTIFY, 0, false);
}
EXPORT_SYMBOL(cfg80211_ch_switch_notify);
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u8 count, bool quiet)
+ unsigned int link_id, u8 count,
+ bool quiet)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- trace_cfg80211_ch_switch_started_notify(dev, chandef);
+ ASSERT_WDEV_LOCK(wdev);
+ WARN_INVALID_LINK_ID(wdev, link_id);
+
+ trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id);
- nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL,
+ nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
count, quiet);
}
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index d642e3be4ee7..855d540ddfb9 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions of this file
- * Copyright (C) 2018, 2020-2021 Intel Corporation
+ * Copyright (C) 2018, 2020-2022 Intel Corporation
*/
#ifndef __NET_WIRELESS_NL80211_H
#define __NET_WIRELESS_NL80211_H
@@ -60,9 +60,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
const u8 *buf, size_t len, gfp_t gfp);
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- const u8 *buf, size_t len, gfp_t gfp,
- int uapsd_queues,
- const u8 *req_ies, size_t req_ies_len);
+ struct cfg80211_rx_assoc_resp *data);
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *buf, size_t len,
@@ -107,8 +105,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlpid,
- int freq, int sig_dbm,
- const u8 *buf, size_t len, u32 flags, gfp_t gfp);
+ struct cfg80211_rx_info *info, gfp_t gfp);
void
nl80211_radar_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c
index 2d26a6d980bf..27a1732264f9 100644
--- a/net/wireless/ocb.c
+++ b/net/wireless/ocb.c
@@ -4,6 +4,7 @@
*
* Copyright: (c) 2014 Czech Technical University in Prague
* (c) 2014 Volkswagen Group Research
+ * Copyright (C) 2022 Intel Corporation
* Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
* Funded by: Volkswagen Group Research
*/
@@ -34,7 +35,7 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev,
err = rdev_join_ocb(rdev, dev, setup);
if (!err)
- wdev->chandef = setup->chandef;
+ wdev->u.ocb.chandef = setup->chandef;
return err;
}
@@ -69,7 +70,7 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
err = rdev_leave_ocb(rdev, dev);
if (!err)
- memset(&wdev->chandef, 0, sizeof(wdev->chandef));
+ memset(&wdev->u.ocb.chandef, 0, sizeof(wdev->u.ocb.chandef));
return err;
}
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 328cf54bda82..2bc647720cda 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -2,8 +2,6 @@
/*
* Copyright (C) 2018 - 2021 Intel Corporation
*/
-#ifndef __PMSR_H
-#define __PMSR_H
#include <net/cfg80211.h>
#include "core.h"
#include "nl80211.h"
@@ -661,5 +659,3 @@ void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid)
}
spin_unlock_bh(&wdev->pmsr_lock);
}
-
-#endif /* __PMSR_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 439bcf52369c..13b209a8db28 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1,4 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright(c) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018, 2021-2022 Intel Corporation
+ */
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
@@ -72,65 +77,69 @@ rdev_change_virtual_intf(struct cfg80211_registered_device *rdev,
}
static inline int rdev_add_key(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u8 key_index,
- bool pairwise, const u8 *mac_addr,
+ struct net_device *netdev, int link_id,
+ u8 key_index, bool pairwise, const u8 *mac_addr,
struct key_params *params)
{
int ret;
- trace_rdev_add_key(&rdev->wiphy, netdev, key_index, pairwise,
+ trace_rdev_add_key(&rdev->wiphy, netdev, link_id, key_index, pairwise,
mac_addr, params->mode);
- ret = rdev->ops->add_key(&rdev->wiphy, netdev, key_index, pairwise,
- mac_addr, params);
+ ret = rdev->ops->add_key(&rdev->wiphy, netdev, link_id, key_index,
+ pairwise, mac_addr, params);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int
rdev_get_key(struct cfg80211_registered_device *rdev, struct net_device *netdev,
- u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie,
+ int link_id, u8 key_index, bool pairwise, const u8 *mac_addr,
+ void *cookie,
void (*callback)(void *cookie, struct key_params*))
{
int ret;
- trace_rdev_get_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr);
- ret = rdev->ops->get_key(&rdev->wiphy, netdev, key_index, pairwise,
- mac_addr, cookie, callback);
+ trace_rdev_get_key(&rdev->wiphy, netdev, link_id, key_index, pairwise,
+ mac_addr);
+ ret = rdev->ops->get_key(&rdev->wiphy, netdev, link_id, key_index,
+ pairwise, mac_addr, cookie, callback);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int rdev_del_key(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u8 key_index,
- bool pairwise, const u8 *mac_addr)
+ struct net_device *netdev, int link_id,
+ u8 key_index, bool pairwise, const u8 *mac_addr)
{
int ret;
- trace_rdev_del_key(&rdev->wiphy, netdev, key_index, pairwise, mac_addr);
- ret = rdev->ops->del_key(&rdev->wiphy, netdev, key_index, pairwise,
- mac_addr);
+ trace_rdev_del_key(&rdev->wiphy, netdev, link_id, key_index, pairwise,
+ mac_addr);
+ ret = rdev->ops->del_key(&rdev->wiphy, netdev, link_id, key_index,
+ pairwise, mac_addr);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int
rdev_set_default_key(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u8 key_index, bool unicast,
- bool multicast)
+ struct net_device *netdev, int link_id, u8 key_index,
+ bool unicast, bool multicast)
{
int ret;
- trace_rdev_set_default_key(&rdev->wiphy, netdev, key_index,
+ trace_rdev_set_default_key(&rdev->wiphy, netdev, link_id, key_index,
unicast, multicast);
- ret = rdev->ops->set_default_key(&rdev->wiphy, netdev, key_index,
- unicast, multicast);
+ ret = rdev->ops->set_default_key(&rdev->wiphy, netdev, link_id,
+ key_index, unicast, multicast);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int
rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u8 key_index)
+ struct net_device *netdev, int link_id, u8 key_index)
{
int ret;
- trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, key_index);
- ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev,
+ trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, link_id,
+ key_index);
+ ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev, link_id,
key_index);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
@@ -138,13 +147,15 @@ rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev,
static inline int
rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev,
- struct net_device *netdev, u8 key_index)
+ struct net_device *netdev, int link_id,
+ u8 key_index)
{
int ret;
- trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, key_index);
- ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev,
- key_index);
+ trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, link_id,
+ key_index);
+ ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, link_id,
+ key_index);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -172,11 +183,11 @@ static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev,
}
static inline int rdev_stop_ap(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
int ret;
- trace_rdev_stop_ap(&rdev->wiphy, dev);
- ret = rdev->ops->stop_ap(&rdev->wiphy, dev);
+ trace_rdev_stop_ap(&rdev->wiphy, dev, link_id);
+ ret = rdev->ops->stop_ap(&rdev->wiphy, dev, link_id);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -464,18 +475,9 @@ static inline int rdev_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_assoc_request *req)
{
- const struct cfg80211_bss_ies *bss_ies;
int ret;
- /*
- * Note: we might trace not exactly the data that's processed,
- * due to races and the driver/mac80211 getting a newer copy.
- */
- rcu_read_lock();
- bss_ies = rcu_dereference(req->bss->ies);
- trace_rdev_assoc(&rdev->wiphy, dev, req, bss_ies);
- rcu_read_unlock();
-
+ trace_rdev_assoc(&rdev->wiphy, dev, req);
ret = rdev->ops->assoc(&rdev->wiphy, dev, req);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
@@ -651,12 +653,14 @@ static inline int rdev_testmode_dump(struct cfg80211_registered_device *rdev,
static inline int
rdev_set_bitrate_mask(struct cfg80211_registered_device *rdev,
- struct net_device *dev, const u8 *peer,
+ struct net_device *dev, unsigned int link_id,
+ const u8 *peer,
const struct cfg80211_bitrate_mask *mask)
{
int ret;
- trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, peer, mask);
- ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, peer, mask);
+ trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, link_id, peer, mask);
+ ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, link_id,
+ peer, mask);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -748,13 +752,14 @@ static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev,
struct net_device *dev,
const void *buf, size_t len,
const u8 *dest, __be16 proto,
- const bool noencrypt, u64 *cookie)
+ const bool noencrypt, int link,
+ u64 *cookie)
{
int ret;
trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len,
- dest, proto, noencrypt);
+ dest, proto, noencrypt, link);
ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len,
- dest, proto, noencrypt, cookie);
+ dest, proto, noencrypt, link, cookie);
if (cookie)
trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
else
@@ -944,12 +949,13 @@ static inline int rdev_set_noack_map(struct cfg80211_registered_device *rdev,
static inline int
rdev_get_channel(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef)
{
int ret;
- trace_rdev_get_channel(&rdev->wiphy, wdev);
- ret = rdev->ops->get_channel(&rdev->wiphy, wdev, chandef);
+ trace_rdev_get_channel(&rdev->wiphy, wdev, link_id);
+ ret = rdev->ops->get_channel(&rdev->wiphy, wdev, link_id, chandef);
trace_rdev_return_chandef(&rdev->wiphy, ret, chandef);
return ret;
@@ -1107,12 +1113,14 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev,
static inline int
rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
- struct net_device *dev, struct cfg80211_chan_def *chandef)
+ struct net_device *dev,
+ unsigned int link_id,
+ struct cfg80211_chan_def *chandef)
{
int ret;
- trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef);
- ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+ trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef);
+ ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
@@ -1412,4 +1420,78 @@ rdev_set_radar_background(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_add_intf_link(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ int ret = 0;
+
+ trace_rdev_add_intf_link(&rdev->wiphy, wdev, link_id);
+ if (rdev->ops->add_intf_link)
+ ret = rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
+static inline void
+rdev_del_intf_link(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id);
+ if (rdev->ops->add_intf_link)
+ rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id);
+ trace_rdev_return_void(&rdev->wiphy);
+}
+
+static inline int
+rdev_add_link_station(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct link_station_parameters *params)
+{
+ int ret;
+
+ if (!rdev->ops->add_link_station)
+ return -EOPNOTSUPP;
+
+ trace_rdev_add_link_station(&rdev->wiphy, dev, params);
+ ret = rdev->ops->add_link_station(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int
+rdev_mod_link_station(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct link_station_parameters *params)
+{
+ int ret;
+
+ if (!rdev->ops->mod_link_station)
+ return -EOPNOTSUPP;
+
+ trace_rdev_mod_link_station(&rdev->wiphy, dev, params);
+ ret = rdev->ops->mod_link_station(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline int
+rdev_del_link_station(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct link_station_del_parameters *params)
+{
+ int ret;
+
+ if (!rdev->ops->del_link_station)
+ return -EOPNOTSUPP;
+
+ trace_rdev_del_link_station(&rdev->wiphy, dev, params);
+ ret = rdev->ops->del_link_station(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index ec25924a1c26..c3d950d29432 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,7 +5,7 @@
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2021 Intel Corporation
+ * Copyright (C) 2018 - 2022 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -807,6 +807,8 @@ static int __init load_builtin_regdb_keys(void)
return 0;
}
+MODULE_FIRMWARE("regulatory.db.p7s");
+
static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
{
const struct firmware *sig;
@@ -1078,8 +1080,12 @@ static void regdb_fw_cb(const struct firmware *fw, void *context)
release_firmware(fw);
}
+MODULE_FIRMWARE("regulatory.db");
+
static int query_regdb_file(const char *alpha2)
{
+ int err;
+
ASSERT_RTNL();
if (regdb)
@@ -1089,9 +1095,13 @@ static int query_regdb_file(const char *alpha2)
if (!alpha2)
return -ENOMEM;
- return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
- &reg_pdev->dev, GFP_KERNEL,
- (void *)alpha2, regdb_fw_cb);
+ err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+ &reg_pdev->dev, GFP_KERNEL,
+ (void *)alpha2, regdb_fw_cb);
+ if (err)
+ kfree(alpha2);
+
+ return err;
}
int reg_reload_regdb(void)
@@ -1238,6 +1248,8 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
{
unsigned int bw = reg_get_max_bandwidth_from_range(rd, rule);
+ if (rule->flags & NL80211_RRF_NO_320MHZ)
+ bw = min_t(unsigned int, bw, MHZ_TO_KHZ(160));
if (rule->flags & NL80211_RRF_NO_160MHZ)
bw = min_t(unsigned int, bw, MHZ_TO_KHZ(80));
if (rule->flags & NL80211_RRF_NO_80MHZ)
@@ -1611,6 +1623,8 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_160MHZ;
if (rd_flags & NL80211_RRF_NO_HE)
channel_flags |= IEEE80211_CHAN_NO_HE;
+ if (rd_flags & NL80211_RRF_NO_320MHZ)
+ channel_flags |= IEEE80211_CHAN_NO_320MHZ;
return channel_flags;
}
@@ -1773,6 +1787,8 @@ static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
bw_flags |= IEEE80211_CHAN_NO_160MHZ;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(320))
+ bw_flags |= IEEE80211_CHAN_NO_320MHZ;
}
return bw_flags;
}
@@ -2360,6 +2376,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
enum nl80211_iftype iftype;
bool ret;
+ int link;
wdev_lock(wdev);
iftype = wdev->iftype;
@@ -2368,62 +2385,87 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
if (!wdev->netdev || !netif_running(wdev->netdev))
goto wdev_inactive_unlock;
- switch (iftype) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_P2P_GO:
- case NL80211_IFTYPE_MESH_POINT:
- if (!wdev->beacon_interval)
- goto wdev_inactive_unlock;
- chandef = wdev->chandef;
- break;
- case NL80211_IFTYPE_ADHOC:
- if (!wdev->ssid_len)
- goto wdev_inactive_unlock;
- chandef = wdev->chandef;
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- if (!wdev->current_bss ||
- !wdev->current_bss->pub.channel)
- goto wdev_inactive_unlock;
-
- if (!rdev->ops->get_channel ||
- rdev_get_channel(rdev, wdev, &chandef))
- cfg80211_chandef_create(&chandef,
- wdev->current_bss->pub.channel,
- NL80211_CHAN_NO_HT);
- break;
- case NL80211_IFTYPE_MONITOR:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_DEVICE:
- /* no enforcement required */
- break;
- default:
- /* others not implemented for now */
- WARN_ON(1);
- break;
- }
+ for (link = 0; link < ARRAY_SIZE(wdev->links); link++) {
+ struct ieee80211_channel *chan;
- wdev_unlock(wdev);
+ if (!wdev->valid_links && link > 0)
+ break;
+ if (!(wdev->valid_links & BIT(link)))
+ continue;
+ switch (iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ if (!wdev->links[link].ap.beacon_interval)
+ continue;
+ chandef = wdev->links[link].ap.chandef;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ if (!wdev->u.mesh.beacon_interval)
+ continue;
+ chandef = wdev->u.mesh.chandef;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ if (!wdev->u.ibss.ssid_len)
+ continue;
+ chandef = wdev->u.ibss.chandef;
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ /* Maybe we could consider disabling that link only? */
+ if (!wdev->links[link].client.current_bss)
+ continue;
- switch (iftype) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_P2P_GO:
- case NL80211_IFTYPE_ADHOC:
- case NL80211_IFTYPE_MESH_POINT:
- wiphy_lock(wiphy);
- ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype);
- wiphy_unlock(wiphy);
+ chan = wdev->links[link].client.current_bss->pub.channel;
+ if (!chan)
+ continue;
- return ret;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- return cfg80211_chandef_usable(wiphy, &chandef,
- IEEE80211_CHAN_DISABLED);
- default:
- break;
+ if (!rdev->ops->get_channel ||
+ rdev_get_channel(rdev, wdev, link, &chandef))
+ cfg80211_chandef_create(&chandef, chan,
+ NL80211_CHAN_NO_HT);
+ break;
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ /* no enforcement required */
+ break;
+ default:
+ /* others not implemented for now */
+ WARN_ON(1);
+ break;
+ }
+
+ wdev_unlock(wdev);
+
+ switch (iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_MESH_POINT:
+ wiphy_lock(wiphy);
+ ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef,
+ iftype);
+ wiphy_unlock(wiphy);
+
+ if (!ret)
+ return ret;
+ break;
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ ret = cfg80211_chandef_usable(wiphy, &chandef,
+ IEEE80211_CHAN_DISABLED);
+ if (!ret)
+ return ret;
+ break;
+ default:
+ break;
+ }
+
+ wdev_lock(wdev);
}
+ wdev_unlock(wdev);
+
return true;
wdev_inactive_unlock:
@@ -4205,8 +4247,17 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
* In both cases we should end the CAC on the wdev.
*/
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
- if (wdev->cac_started &&
- !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef))
+ struct cfg80211_chan_def *chandef;
+
+ if (!wdev->cac_started)
+ continue;
+
+ /* FIXME: radar detection is tied to link 0 for now */
+ chandef = wdev_chandef(wdev, 0);
+ if (!chandef)
+ continue;
+
+ if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
rdev_end_cac(rdev, wdev->netdev);
}
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index b888522f133b..da752b0cc752 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -143,18 +143,12 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev,
lockdep_assert_held(&rdev->bss_lock);
bss->refcount++;
- if (bss->pub.hidden_beacon_bss) {
- bss = container_of(bss->pub.hidden_beacon_bss,
- struct cfg80211_internal_bss,
- pub);
- bss->refcount++;
- }
- if (bss->pub.transmitted_bss) {
- bss = container_of(bss->pub.transmitted_bss,
- struct cfg80211_internal_bss,
- pub);
- bss->refcount++;
- }
+
+ if (bss->pub.hidden_beacon_bss)
+ bss_from_pub(bss->pub.hidden_beacon_bss)->refcount++;
+
+ if (bss->pub.transmitted_bss)
+ bss_from_pub(bss->pub.transmitted_bss)->refcount++;
}
static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
@@ -304,7 +298,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen);
tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie;
- while (tmp_old + tmp_old[1] + 2 - ie <= ielen) {
+ while (tmp_old + 2 - ie <= ielen &&
+ tmp_old + tmp_old[1] + 2 - ie <= ielen) {
if (tmp_old[0] == 0) {
tmp_old++;
continue;
@@ -364,7 +359,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
* copied to new ie, skip ssid, capability, bssid-index ie
*/
tmp_new = sub_copy;
- while (tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) {
+ while (tmp_new + 2 - sub_copy <= subie_len &&
+ tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) {
if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP ||
tmp_new[0] == WLAN_EID_SSID)) {
memcpy(pos, tmp_new, tmp_new[1] + 2);
@@ -427,6 +423,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
rcu_read_unlock();
+ /*
+ * This is a bit weird - it's not on the list, but already on another
+ * one! The only way that could happen is if there's some BSSID/SSID
+ * shared by multiple APs in their multi-BSSID profiles, potentially
+ * with hidden SSID mixed in ... ignore it.
+ */
+ if (!list_empty(&nontrans_bss->nontrans_list))
+ return -EINVAL;
+
/* add to the list */
list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
return 0;
@@ -540,7 +545,7 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
memcpy(entry->bssid, pos, ETH_ALEN);
pos += ETH_ALEN;
- if (length == IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM) {
+ if (length >= IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM) {
memcpy(&entry->short_ssid, pos,
sizeof(entry->short_ssid));
entry->short_ssid_valid = true;
@@ -700,8 +705,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
for (i = 0; i < request->n_ssids; i++) {
/* wildcard ssid in the scan request */
- if (!request->ssids[i].ssid_len)
+ if (!request->ssids[i].ssid_len) {
+ if (ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
return true;
+ }
if (ap->ssid_len &&
ap->ssid_len == request->ssids[i].ssid_len) {
@@ -827,6 +836,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
!cfg80211_find_ssid_match(ap, request))
continue;
+ if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
cfg80211_scan_req_add_chan(request, chan, true);
memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
scan_6ghz_params->short_ssid = ap->short_ssid;
@@ -1595,6 +1607,23 @@ struct cfg80211_non_tx_bss {
u8 bssid_index;
};
+static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *new_ies,
+ const struct cfg80211_bss_ies *old_ies)
+{
+ struct cfg80211_internal_bss *bss;
+
+ /* Assign beacon IEs to all sub entries */
+ list_for_each_entry(bss, &known->hidden_list, hidden_list) {
+ const struct cfg80211_bss_ies *ies;
+
+ ies = rcu_access_pointer(bss->pub.beacon_ies);
+ WARN_ON(ies != old_ies);
+
+ rcu_assign_pointer(bss->pub.beacon_ies, new_ies);
+ }
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1618,7 +1647,6 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
} else if (rcu_access_pointer(new->pub.beacon_ies)) {
const struct cfg80211_bss_ies *old;
- struct cfg80211_internal_bss *bss;
if (known->pub.hidden_beacon_bss &&
!list_empty(&known->hidden_list)) {
@@ -1646,16 +1674,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
if (old == rcu_access_pointer(known->pub.ies))
rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies);
- /* Assign beacon IEs to all sub entries */
- list_for_each_entry(bss, &known->hidden_list, hidden_list) {
- const struct cfg80211_bss_ies *ies;
-
- ies = rcu_access_pointer(bss->pub.beacon_ies);
- WARN_ON(ies != old);
-
- rcu_assign_pointer(bss->pub.beacon_ies,
- new->pub.beacon_ies);
- }
+ cfg80211_update_hidden_bsses(known,
+ rcu_access_pointer(new->pub.beacon_ies),
+ old);
if (old)
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
@@ -1732,6 +1753,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
new->refcount = 1;
INIT_LIST_HEAD(&new->hidden_list);
INIT_LIST_HEAD(&new->pub.nontrans_list);
+ /* we'll set this later if it was non-NULL */
+ new->pub.transmitted_bss = NULL;
if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
@@ -1822,7 +1845,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) {
struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data;
- return s1gop->primary_ch;
+ return s1gop->oper_ch;
}
} else {
tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen);
@@ -2011,11 +2034,18 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
/* this is a nontransmitting bss, we need to add it to
* transmitting bss' list if it is not there
*/
+ spin_lock_bh(&rdev->bss_lock);
if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
&res->pub)) {
- if (__cfg80211_unlink_bss(rdev, res))
+ if (__cfg80211_unlink_bss(rdev, res)) {
rdev->bss_generation++;
+ res = NULL;
+ }
}
+ spin_unlock_bh(&rdev->bss_lock);
+
+ if (!res)
+ return NULL;
}
trace_cfg80211_return_bss(&res->pub);
@@ -2134,6 +2164,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) {
if (elem->datalen < 4)
continue;
+ if (elem->data[0] < 1 || (int)elem->data[0] > 8)
+ continue;
for_each_element(sub, elem->data + 1, elem->datalen - 1) {
u8 profile_len;
@@ -2270,7 +2302,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
size_t new_ie_len;
struct cfg80211_bss_ies *new_ies;
const struct cfg80211_bss_ies *old;
- u8 cpy_len;
+ size_t cpy_len;
lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
@@ -2337,6 +2369,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
} else {
old = rcu_access_pointer(nontrans_bss->beacon_ies);
rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies);
+ cfg80211_update_hidden_bsses(bss_from_pub(nontrans_bss),
+ new_ies, old);
rcu_assign_pointer(nontrans_bss->ies, new_ies);
if (old)
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
@@ -2608,7 +2642,8 @@ void cfg80211_bss_iter(struct wiphy *wiphy,
spin_lock_bh(&rdev->bss_lock);
list_for_each_entry(bss, &rdev->bss_list, list) {
- if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel))
+ if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel,
+ false))
iter(wiphy, &bss->pub, iter_data);
}
@@ -2617,11 +2652,12 @@ void cfg80211_bss_iter(struct wiphy *wiphy,
EXPORT_SYMBOL(cfg80211_bss_iter);
void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
+ unsigned int link_id,
struct ieee80211_channel *chan)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- struct cfg80211_internal_bss *cbss = wdev->current_bss;
+ struct cfg80211_internal_bss *cbss = wdev->links[link_id].client.current_bss;
struct cfg80211_internal_bss *new = NULL;
struct cfg80211_internal_bss *bss;
struct cfg80211_bss *nontrans_bss;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index ff4d48fcbfb2..d513536617bd 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
* (for nl80211's connect() and wext)
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009, 2020 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved.
* Copyright 2017 Intel Deutschland GmbH
*/
@@ -147,6 +147,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev,
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_connect_params *params;
+ struct cfg80211_auth_request auth_req = {};
struct cfg80211_assoc_request req = {};
int err;
@@ -167,13 +168,19 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev,
if (WARN_ON(!rdev->ops->auth))
return -EOPNOTSUPP;
wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
- return cfg80211_mlme_auth(rdev, wdev->netdev,
- params->channel, params->auth_type,
- params->bssid,
- params->ssid, params->ssid_len,
- NULL, 0,
- params->key, params->key_len,
- params->key_idx, NULL, 0);
+ auth_req.key = params->key;
+ auth_req.key_len = params->key_len;
+ auth_req.key_idx = params->key_idx;
+ auth_req.auth_type = params->auth_type;
+ auth_req.bss = cfg80211_get_bss(&rdev->wiphy, params->channel,
+ params->bssid,
+ params->ssid, params->ssid_len,
+ IEEE80211_BSS_TYPE_ESS,
+ IEEE80211_PRIVACY_ANY);
+ auth_req.link_id = -1;
+ err = cfg80211_mlme_auth(rdev, wdev->netdev, &auth_req);
+ cfg80211_put_bss(&rdev->wiphy, auth_req.bss);
+ return err;
case CFG80211_CONN_AUTH_FAILED_TIMEOUT:
*treason = NL80211_TIMEOUT_AUTH;
return -ENOTCONN;
@@ -192,10 +199,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev,
req.ht_capa_mask = params->ht_capa_mask;
req.vht_capa = params->vht_capa;
req.vht_capa_mask = params->vht_capa_mask;
+ req.link_id = -1;
+
+ req.bss = cfg80211_get_bss(&rdev->wiphy, params->channel,
+ params->bssid,
+ params->ssid, params->ssid_len,
+ IEEE80211_BSS_TYPE_ESS,
+ IEEE80211_PRIVACY_ANY);
+ if (!req.bss) {
+ err = -ENOENT;
+ } else {
+ err = cfg80211_mlme_assoc(rdev, wdev->netdev, &req);
+ cfg80211_put_bss(&rdev->wiphy, req.bss);
+ }
- err = cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel,
- params->bssid, params->ssid,
- params->ssid_len, &req);
if (err)
cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
NULL, 0,
@@ -258,7 +275,7 @@ void cfg80211_conn_work(struct work_struct *work)
memset(&cr, 0, sizeof(cr));
cr.status = -1;
- cr.bssid = bssid;
+ cr.links[0].bssid = bssid;
cr.timeout_reason = treason;
__cfg80211_connect_result(wdev->netdev, &cr, false);
}
@@ -367,7 +384,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len)
memset(&cr, 0, sizeof(cr));
cr.status = status_code;
- cr.bssid = mgmt->bssid;
+ cr.links[0].bssid = mgmt->bssid;
cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED;
__cfg80211_connect_result(wdev->netdev, &cr, false);
} else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
@@ -454,6 +471,20 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev)
schedule_work(&rdev->conn_work);
}
+static void cfg80211_wdev_release_bsses(struct wireless_dev *wdev)
+{
+ unsigned int link;
+
+ for_each_valid_link(wdev, link) {
+ if (!wdev->links[link].client.current_bss)
+ continue;
+ cfg80211_unhold_bss(wdev->links[link].client.current_bss);
+ cfg80211_put_bss(wdev->wiphy,
+ &wdev->links[link].client.current_bss->pub);
+ wdev->links[link].client.current_bss = NULL;
+ }
+}
+
static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev,
const u8 *ies, size_t ies_len,
const u8 **out_ies, size_t *out_ies_len)
@@ -521,12 +552,11 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
if (!rdev->ops->auth || !rdev->ops->assoc)
return -EOPNOTSUPP;
- if (wdev->current_bss) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
- wdev->current_bss = NULL;
+ cfg80211_wdev_release_bsses(wdev);
+ if (wdev->connected) {
cfg80211_sme_free(wdev);
+ wdev->connected = false;
}
if (wdev->conn)
@@ -563,8 +593,8 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
wdev->conn->auto_auth = false;
}
- wdev->conn->params.ssid = wdev->ssid;
- wdev->conn->params.ssid_len = wdev->ssid_len;
+ wdev->conn->params.ssid = wdev->u.client.ssid;
+ wdev->conn->params.ssid_len = wdev->u.client.ssid_len;
/* see if we have the bss already */
bss = cfg80211_get_conn_bss(wdev);
@@ -648,7 +678,7 @@ static bool cfg80211_is_all_idle(void)
list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
wdev_lock(wdev);
- if (wdev->conn || wdev->current_bss ||
+ if (wdev->conn || wdev->connected ||
cfg80211_beaconing_iface_active(wdev))
is_all_idle = false;
wdev_unlock(wdev);
@@ -668,6 +698,19 @@ static void disconnect_work(struct work_struct *work)
DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
+static void
+cfg80211_connect_result_release_bsses(struct wireless_dev *wdev,
+ struct cfg80211_connect_resp_params *cr)
+{
+ unsigned int link;
+
+ for_each_valid_link(cr, link) {
+ if (!cr->links[link].bss)
+ continue;
+ cfg80211_unhold_bss(bss_from_pub(cr->links[link].bss));
+ cfg80211_put_bss(wdev->wiphy, cr->links[link].bss);
+ }
+}
/*
* API calls for drivers implementing connect/disconnect and
@@ -680,27 +723,42 @@ void __cfg80211_connect_result(struct net_device *dev,
bool wextev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- const struct element *country_elem;
+ const struct element *country_elem = NULL;
const u8 *country_data;
u8 country_datalen;
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
#endif
+ unsigned int link;
+ const u8 *connected_addr;
+ bool bss_not_found = false;
ASSERT_WDEV_LOCK(wdev);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
- wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) {
- cfg80211_put_bss(wdev->wiphy, cr->bss);
- return;
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
+ goto out;
+
+ if (cr->valid_links) {
+ if (WARN_ON(!cr->ap_mld_addr))
+ goto out;
+
+ for_each_valid_link(cr, link) {
+ if (WARN_ON(!cr->links[link].addr))
+ goto out;
+ }
+
+ if (WARN_ON(wdev->connect_keys))
+ goto out;
}
wdev->unprot_beacon_reported = 0;
nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr,
GFP_KERNEL);
+ connected_addr = cr->valid_links ? cr->ap_mld_addr : cr->links[0].bssid;
#ifdef CONFIG_CFG80211_WEXT
- if (wextev) {
+ if (wextev && !cr->valid_links) {
if (cr->req_ie && cr->status == WLAN_STATUS_SUCCESS) {
memset(&wrqu, 0, sizeof(wrqu));
wrqu.data.length = cr->req_ie_len;
@@ -717,54 +775,83 @@ void __cfg80211_connect_result(struct net_device *dev,
memset(&wrqu, 0, sizeof(wrqu));
wrqu.ap_addr.sa_family = ARPHRD_ETHER;
- if (cr->bssid && cr->status == WLAN_STATUS_SUCCESS) {
- memcpy(wrqu.ap_addr.sa_data, cr->bssid, ETH_ALEN);
- memcpy(wdev->wext.prev_bssid, cr->bssid, ETH_ALEN);
+ if (connected_addr && cr->status == WLAN_STATUS_SUCCESS) {
+ memcpy(wrqu.ap_addr.sa_data, connected_addr, ETH_ALEN);
+ memcpy(wdev->wext.prev_bssid, connected_addr, ETH_ALEN);
wdev->wext.prev_bssid_valid = true;
}
wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
}
#endif
- if (!cr->bss && (cr->status == WLAN_STATUS_SUCCESS)) {
- WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
- cr->bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->bssid,
- wdev->ssid, wdev->ssid_len,
- wdev->conn_bss_type,
- IEEE80211_PRIVACY_ANY);
- if (cr->bss)
- cfg80211_hold_bss(bss_from_pub(cr->bss));
- }
+ if (cr->status == WLAN_STATUS_SUCCESS) {
+ if (!wiphy_to_rdev(wdev->wiphy)->ops->connect) {
+ for_each_valid_link(cr, link) {
+ if (WARN_ON_ONCE(!cr->links[link].bss))
+ break;
+ }
+ }
+
+ for_each_valid_link(cr, link) {
+ if (cr->links[link].bss)
+ continue;
- if (wdev->current_bss) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
- wdev->current_bss = NULL;
+ cr->links[link].bss =
+ cfg80211_get_bss(wdev->wiphy, NULL,
+ cr->links[link].bssid,
+ wdev->u.client.ssid,
+ wdev->u.client.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY_ANY);
+ if (!cr->links[link].bss) {
+ bss_not_found = true;
+ break;
+ }
+ cfg80211_hold_bss(bss_from_pub(cr->links[link].bss));
+ }
}
+ cfg80211_wdev_release_bsses(wdev);
+
if (cr->status != WLAN_STATUS_SUCCESS) {
kfree_sensitive(wdev->connect_keys);
wdev->connect_keys = NULL;
- wdev->ssid_len = 0;
+ wdev->u.client.ssid_len = 0;
wdev->conn_owner_nlportid = 0;
- if (cr->bss) {
- cfg80211_unhold_bss(bss_from_pub(cr->bss));
- cfg80211_put_bss(wdev->wiphy, cr->bss);
- }
+ cfg80211_connect_result_release_bsses(wdev, cr);
cfg80211_sme_free(wdev);
return;
}
- if (WARN_ON(!cr->bss))
+ if (WARN_ON(bss_not_found)) {
+ cfg80211_connect_result_release_bsses(wdev, cr);
return;
+ }
- wdev->current_bss = bss_from_pub(cr->bss);
+ memset(wdev->links, 0, sizeof(wdev->links));
+ wdev->valid_links = cr->valid_links;
+ for_each_valid_link(cr, link)
+ wdev->links[link].client.current_bss =
+ bss_from_pub(cr->links[link].bss);
+ wdev->connected = true;
+ ether_addr_copy(wdev->u.client.connected_addr, connected_addr);
+ if (cr->valid_links) {
+ for_each_valid_link(cr, link)
+ memcpy(wdev->links[link].addr, cr->links[link].addr,
+ ETH_ALEN);
+ }
if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
cfg80211_upload_connect_keys(wdev);
rcu_read_lock();
- country_elem = ieee80211_bss_get_elem(cr->bss, WLAN_EID_COUNTRY);
+ for_each_valid_link(cr, link) {
+ country_elem =
+ ieee80211_bss_get_elem(cr->links[link].bss,
+ WLAN_EID_COUNTRY);
+ if (country_elem)
+ break;
+ }
if (!country_elem) {
rcu_read_unlock();
return;
@@ -777,12 +864,60 @@ void __cfg80211_connect_result(struct net_device *dev,
if (!country_data)
return;
- regulatory_hint_country_ie(wdev->wiphy, cr->bss->channel->band,
+ regulatory_hint_country_ie(wdev->wiphy,
+ cr->links[link].bss->channel->band,
country_data, country_datalen);
kfree(country_data);
+
+ return;
+out:
+ for_each_valid_link(cr, link)
+ cfg80211_put_bss(wdev->wiphy, cr->links[link].bss);
}
-/* Consumes bss object one way or another */
+static void cfg80211_update_link_bss(struct wireless_dev *wdev,
+ struct cfg80211_bss **bss)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_internal_bss *ibss;
+
+ if (!*bss)
+ return;
+
+ ibss = bss_from_pub(*bss);
+ if (list_empty(&ibss->list)) {
+ struct cfg80211_bss *found = NULL, *tmp = *bss;
+
+ found = cfg80211_get_bss(wdev->wiphy, NULL,
+ (*bss)->bssid,
+ wdev->u.client.ssid,
+ wdev->u.client.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY_ANY);
+ if (found) {
+ /* The same BSS is already updated so use it
+ * instead, as it has latest info.
+ */
+ *bss = found;
+ } else {
+ /* Update with BSS provided by driver, it will
+ * be freshly added and ref cnted, we can free
+ * the old one.
+ *
+ * signal_valid can be false, as we are not
+ * expecting the BSS to be found.
+ *
+ * keep the old timestamp to avoid confusion
+ */
+ cfg80211_bss_update(rdev, ibss, false,
+ ibss->ts);
+ }
+
+ cfg80211_put_bss(wdev->wiphy, tmp);
+ }
+}
+
+/* Consumes bss object(s) one way or another */
void cfg80211_connect_done(struct net_device *dev,
struct cfg80211_connect_resp_params *params,
gfp_t gfp)
@@ -792,55 +927,34 @@ void cfg80211_connect_done(struct net_device *dev,
struct cfg80211_event *ev;
unsigned long flags;
u8 *next;
+ size_t link_info_size = 0;
+ unsigned int link;
- if (params->bss) {
- struct cfg80211_internal_bss *ibss = bss_from_pub(params->bss);
-
- if (list_empty(&ibss->list)) {
- struct cfg80211_bss *found = NULL, *tmp = params->bss;
-
- found = cfg80211_get_bss(wdev->wiphy, NULL,
- params->bss->bssid,
- wdev->ssid, wdev->ssid_len,
- wdev->conn_bss_type,
- IEEE80211_PRIVACY_ANY);
- if (found) {
- /* The same BSS is already updated so use it
- * instead, as it has latest info.
- */
- params->bss = found;
- } else {
- /* Update with BSS provided by driver, it will
- * be freshly added and ref cnted, we can free
- * the old one.
- *
- * signal_valid can be false, as we are not
- * expecting the BSS to be found.
- *
- * keep the old timestamp to avoid confusion
- */
- cfg80211_bss_update(rdev, ibss, false,
- ibss->ts);
- }
-
- cfg80211_put_bss(wdev->wiphy, tmp);
- }
+ for_each_valid_link(params, link) {
+ cfg80211_update_link_bss(wdev, &params->links[link].bss);
+ link_info_size += params->links[link].bssid ? ETH_ALEN : 0;
+ link_info_size += params->links[link].addr ? ETH_ALEN : 0;
}
- ev = kzalloc(sizeof(*ev) + (params->bssid ? ETH_ALEN : 0) +
+ ev = kzalloc(sizeof(*ev) + (params->ap_mld_addr ? ETH_ALEN : 0) +
params->req_ie_len + params->resp_ie_len +
params->fils.kek_len + params->fils.pmk_len +
- (params->fils.pmkid ? WLAN_PMKID_LEN : 0), gfp);
+ (params->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size,
+ gfp);
+
if (!ev) {
- cfg80211_put_bss(wdev->wiphy, params->bss);
+ for_each_valid_link(params, link)
+ cfg80211_put_bss(wdev->wiphy,
+ params->links[link].bss);
return;
}
ev->type = EVENT_CONNECT_RESULT;
next = ((u8 *)ev) + sizeof(*ev);
- if (params->bssid) {
- ev->cr.bssid = next;
- memcpy((void *)ev->cr.bssid, params->bssid, ETH_ALEN);
+ if (params->ap_mld_addr) {
+ ev->cr.ap_mld_addr = next;
+ memcpy((void *)ev->cr.ap_mld_addr, params->ap_mld_addr,
+ ETH_ALEN);
next += ETH_ALEN;
}
if (params->req_ie_len) {
@@ -880,9 +994,28 @@ void cfg80211_connect_done(struct net_device *dev,
ev->cr.fils.update_erp_next_seq_num = params->fils.update_erp_next_seq_num;
if (params->fils.update_erp_next_seq_num)
ev->cr.fils.erp_next_seq_num = params->fils.erp_next_seq_num;
- if (params->bss)
- cfg80211_hold_bss(bss_from_pub(params->bss));
- ev->cr.bss = params->bss;
+ ev->cr.valid_links = params->valid_links;
+ for_each_valid_link(params, link) {
+ if (params->links[link].bss)
+ cfg80211_hold_bss(
+ bss_from_pub(params->links[link].bss));
+ ev->cr.links[link].bss = params->links[link].bss;
+
+ if (params->links[link].addr) {
+ ev->cr.links[link].addr = next;
+ memcpy((void *)ev->cr.links[link].addr,
+ params->links[link].addr,
+ ETH_ALEN);
+ next += ETH_ALEN;
+ }
+ if (params->links[link].bssid) {
+ ev->cr.links[link].bssid = next;
+ memcpy((void *)ev->cr.links[link].bssid,
+ params->links[link].bssid,
+ ETH_ALEN);
+ next += ETH_ALEN;
+ }
+ }
ev->cr.status = params->status;
ev->cr.timeout_reason = params->timeout_reason;
@@ -900,58 +1033,88 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
#ifdef CONFIG_CFG80211_WEXT
union iwreq_data wrqu;
#endif
+ unsigned int link;
+ const u8 *connected_addr;
+
ASSERT_WDEV_LOCK(wdev);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
goto out;
- if (WARN_ON(!wdev->current_bss))
+ if (WARN_ON(!wdev->connected))
goto out;
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
- wdev->current_bss = NULL;
+ if (info->valid_links) {
+ if (WARN_ON(!info->ap_mld_addr))
+ goto out;
- if (WARN_ON(!info->bss))
- return;
+ for_each_valid_link(info, link) {
+ if (WARN_ON(!info->links[link].addr))
+ goto out;
+ }
+ }
+
+ cfg80211_wdev_release_bsses(wdev);
- cfg80211_hold_bss(bss_from_pub(info->bss));
- wdev->current_bss = bss_from_pub(info->bss);
+ for_each_valid_link(info, link) {
+ if (WARN_ON(!info->links[link].bss))
+ goto out;
+ }
+
+ memset(wdev->links, 0, sizeof(wdev->links));
+ wdev->valid_links = info->valid_links;
+ for_each_valid_link(info, link) {
+ cfg80211_hold_bss(bss_from_pub(info->links[link].bss));
+ wdev->links[link].client.current_bss =
+ bss_from_pub(info->links[link].bss);
+ }
+ connected_addr = info->valid_links ?
+ info->ap_mld_addr :
+ info->links[0].bss->bssid;
+ ether_addr_copy(wdev->u.client.connected_addr, connected_addr);
+ if (info->valid_links) {
+ for_each_valid_link(info, link)
+ memcpy(wdev->links[link].addr, info->links[link].addr,
+ ETH_ALEN);
+ }
wdev->unprot_beacon_reported = 0;
nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy),
wdev->netdev, info, GFP_KERNEL);
#ifdef CONFIG_CFG80211_WEXT
- if (info->req_ie) {
- memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = info->req_ie_len;
- wireless_send_event(wdev->netdev, IWEVASSOCREQIE,
- &wrqu, info->req_ie);
- }
+ if (!info->valid_links) {
+ if (info->req_ie) {
+ memset(&wrqu, 0, sizeof(wrqu));
+ wrqu.data.length = info->req_ie_len;
+ wireless_send_event(wdev->netdev, IWEVASSOCREQIE,
+ &wrqu, info->req_ie);
+ }
+
+ if (info->resp_ie) {
+ memset(&wrqu, 0, sizeof(wrqu));
+ wrqu.data.length = info->resp_ie_len;
+ wireless_send_event(wdev->netdev, IWEVASSOCRESPIE,
+ &wrqu, info->resp_ie);
+ }
- if (info->resp_ie) {
memset(&wrqu, 0, sizeof(wrqu));
- wrqu.data.length = info->resp_ie_len;
- wireless_send_event(wdev->netdev, IWEVASSOCRESPIE,
- &wrqu, info->resp_ie);
+ wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+ memcpy(wrqu.ap_addr.sa_data, connected_addr, ETH_ALEN);
+ memcpy(wdev->wext.prev_bssid, connected_addr, ETH_ALEN);
+ wdev->wext.prev_bssid_valid = true;
+ wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL);
}
-
- memset(&wrqu, 0, sizeof(wrqu));
- wrqu.ap_addr.sa_family = ARPHRD_ETHER;
- memcpy(wrqu.ap_addr.sa_data, info->bss->bssid, ETH_ALEN);
- memcpy(wdev->wext.prev_bssid, info->bss->bssid, ETH_ALEN);
- wdev->wext.prev_bssid_valid = true;
- wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL);
#endif
return;
out:
- cfg80211_put_bss(wdev->wiphy, info->bss);
+ for_each_valid_link(info, link)
+ cfg80211_put_bss(wdev->wiphy, info->links[link].bss);
}
-/* Consumes info->bss object one way or another */
+/* Consumes info->links.bss object(s) one way or another */
void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
gfp_t gfp)
{
@@ -960,25 +1123,41 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
struct cfg80211_event *ev;
unsigned long flags;
u8 *next;
+ unsigned int link;
+ size_t link_info_size = 0;
+ bool bss_not_found = false;
+
+ for_each_valid_link(info, link) {
+ link_info_size += info->links[link].addr ? ETH_ALEN : 0;
+ link_info_size += info->links[link].bssid ? ETH_ALEN : 0;
+
+ if (info->links[link].bss)
+ continue;
- if (!info->bss) {
- info->bss = cfg80211_get_bss(wdev->wiphy, info->channel,
- info->bssid, wdev->ssid,
- wdev->ssid_len,
- wdev->conn_bss_type,
- IEEE80211_PRIVACY_ANY);
+ info->links[link].bss =
+ cfg80211_get_bss(wdev->wiphy,
+ info->links[link].channel,
+ info->links[link].bssid,
+ wdev->u.client.ssid,
+ wdev->u.client.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY_ANY);
+
+ if (!info->links[link].bss) {
+ bss_not_found = true;
+ break;
+ }
}
- if (WARN_ON(!info->bss))
- return;
+ if (WARN_ON(bss_not_found))
+ goto out;
ev = kzalloc(sizeof(*ev) + info->req_ie_len + info->resp_ie_len +
info->fils.kek_len + info->fils.pmk_len +
- (info->fils.pmkid ? WLAN_PMKID_LEN : 0), gfp);
- if (!ev) {
- cfg80211_put_bss(wdev->wiphy, info->bss);
- return;
- }
+ (info->fils.pmkid ? WLAN_PMKID_LEN : 0) +
+ (info->ap_mld_addr ? ETH_ALEN : 0) + link_info_size, gfp);
+ if (!ev)
+ goto out;
ev->type = EVENT_ROAMED;
next = ((u8 *)ev) + sizeof(*ev);
@@ -1018,12 +1197,43 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
ev->rm.fils.update_erp_next_seq_num = info->fils.update_erp_next_seq_num;
if (info->fils.update_erp_next_seq_num)
ev->rm.fils.erp_next_seq_num = info->fils.erp_next_seq_num;
- ev->rm.bss = info->bss;
+ if (info->ap_mld_addr) {
+ ev->rm.ap_mld_addr = next;
+ memcpy((void *)ev->rm.ap_mld_addr, info->ap_mld_addr,
+ ETH_ALEN);
+ next += ETH_ALEN;
+ }
+ ev->rm.valid_links = info->valid_links;
+ for_each_valid_link(info, link) {
+ ev->rm.links[link].bss = info->links[link].bss;
+
+ if (info->links[link].addr) {
+ ev->rm.links[link].addr = next;
+ memcpy((void *)ev->rm.links[link].addr,
+ info->links[link].addr,
+ ETH_ALEN);
+ next += ETH_ALEN;
+ }
+
+ if (info->links[link].bssid) {
+ ev->rm.links[link].bssid = next;
+ memcpy((void *)ev->rm.links[link].bssid,
+ info->links[link].bssid,
+ ETH_ALEN);
+ next += ETH_ALEN;
+ }
+ }
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
spin_unlock_irqrestore(&wdev->event_lock, flags);
queue_work(cfg80211_wq, &rdev->event_work);
+
+ return;
+out:
+ for_each_valid_link(info, link)
+ cfg80211_put_bss(wdev->wiphy, info->links[link].bss);
+
}
EXPORT_SYMBOL(cfg80211_roamed);
@@ -1031,11 +1241,12 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
{
ASSERT_WDEV_LOCK(wdev);
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
return;
- if (WARN_ON(!wdev->current_bss) ||
- WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ if (WARN_ON(!wdev->connected) ||
+ WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, bssid)))
return;
nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
@@ -1087,13 +1298,9 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
return;
- if (wdev->current_bss) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
- }
-
- wdev->current_bss = NULL;
- wdev->ssid_len = 0;
+ cfg80211_wdev_release_bsses(wdev);
+ wdev->connected = false;
+ wdev->u.client.ssid_len = 0;
wdev->conn_owner_nlportid = 0;
kfree_sensitive(wdev->connect_keys);
wdev->connect_keys = NULL;
@@ -1121,7 +1328,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
max_key_idx = 7;
for (i = 0; i <= max_key_idx; i++)
- rdev_del_key(rdev, dev, i, false, NULL);
+ rdev_del_key(rdev, dev, -1, i, false, NULL);
}
rdev_set_qos_map(rdev, dev, NULL);
@@ -1182,19 +1389,20 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
* already connected, so reject a new SSID unless it's the
* same (which is the case for re-association.)
*/
- if (wdev->ssid_len &&
- (wdev->ssid_len != connect->ssid_len ||
- memcmp(wdev->ssid, connect->ssid, wdev->ssid_len)))
+ if (wdev->u.client.ssid_len &&
+ (wdev->u.client.ssid_len != connect->ssid_len ||
+ memcmp(wdev->u.client.ssid, connect->ssid, wdev->u.client.ssid_len)))
return -EALREADY;
/*
* If connected, reject (re-)association unless prev_bssid
* matches the current BSSID.
*/
- if (wdev->current_bss) {
+ if (wdev->connected) {
if (!prev_bssid)
return -EALREADY;
- if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
+ if (!ether_addr_equal(prev_bssid,
+ wdev->u.client.connected_addr))
return -ENOTCONN;
}
@@ -1245,8 +1453,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
}
wdev->connect_keys = connkeys;
- memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
- wdev->ssid_len = connect->ssid_len;
+ memcpy(wdev->u.client.ssid, connect->ssid, connect->ssid_len);
+ wdev->u.client.ssid_len = connect->ssid_len;
wdev->conn_bss_type = connect->pbss ? IEEE80211_BSS_TYPE_PBSS :
IEEE80211_BSS_TYPE_ESS;
@@ -1262,8 +1470,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
* This could be reassoc getting refused, don't clear
* ssid_len in that case.
*/
- if (!wdev->current_bss)
- wdev->ssid_len = 0;
+ if (!wdev->connected)
+ wdev->u.client.ssid_len = 0;
return err;
}
@@ -1287,7 +1495,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
err = cfg80211_sme_disconnect(wdev, reason);
else if (!rdev->ops->disconnect)
cfg80211_mlme_down(rdev, dev);
- else if (wdev->ssid_len)
+ else if (wdev->u.client.ssid_len)
err = rdev_disconnect(rdev, dev, reason);
/*
@@ -1295,8 +1503,8 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
* in which case cfg80211_disconnected() will take care of
* this later.
*/
- if (!wdev->current_bss)
- wdev->ssid_len = 0;
+ if (!wdev->connected)
+ wdev->u.client.ssid_len = 0;
return err;
}
@@ -1320,7 +1528,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- __cfg80211_stop_ap(rdev, wdev->netdev, false);
+ __cfg80211_stop_ap(rdev, wdev->netdev, -1, false);
break;
case NL80211_IFTYPE_MESH_POINT:
__cfg80211_leave_mesh(rdev, wdev->netdev);
@@ -1332,7 +1540,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
* ops->disconnect not implemented. Otherwise we can
* use cfg80211_disconnect.
*/
- if (rdev->ops->disconnect || wdev->current_bss)
+ if (rdev->ops->disconnect || wdev->connected)
cfg80211_disconnect(rdev, wdev->netdev,
WLAN_REASON_DEAUTH_LEAVING,
true);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 228079d7690a..a405c3edbc47 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -434,13 +434,14 @@ TRACE_EVENT(rdev_change_virtual_intf,
);
DECLARE_EVENT_CLASS(key_handle,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
- bool pairwise, const u8 *mac_addr),
- TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr),
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index, bool pairwise, const u8 *mac_addr),
+ TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
MAC_ENTRY(mac_addr)
+ __field(int, link_id)
__field(u8, key_index)
__field(bool, pairwise)
),
@@ -448,34 +449,38 @@ DECLARE_EVENT_CLASS(key_handle,
WIPHY_ASSIGN;
NETDEV_ASSIGN;
MAC_ASSIGN(mac_addr, mac_addr);
+ __entry->link_id = link_id;
__entry->key_index = key_index;
__entry->pairwise = pairwise;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key_index: %u, pairwise: %s, mac addr: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index,
- BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
+ "key_index: %u, pairwise: %s, mac addr: " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
+ __entry->key_index, BOOL_TO_STR(__entry->pairwise),
+ MAC_PR_ARG(mac_addr))
);
DEFINE_EVENT(key_handle, rdev_get_key,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
- bool pairwise, const u8 *mac_addr),
- TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr)
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index, bool pairwise, const u8 *mac_addr),
+ TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr)
);
DEFINE_EVENT(key_handle, rdev_del_key,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
- bool pairwise, const u8 *mac_addr),
- TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr)
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index, bool pairwise, const u8 *mac_addr),
+ TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr)
);
TRACE_EVENT(rdev_add_key,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
- bool pairwise, const u8 *mac_addr, u8 mode),
- TP_ARGS(wiphy, netdev, key_index, pairwise, mac_addr, mode),
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index, bool pairwise, const u8 *mac_addr, u8 mode),
+ TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr, mode),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
MAC_ENTRY(mac_addr)
+ __field(int, link_id)
__field(u8, key_index)
__field(bool, pairwise)
__field(u8, mode)
@@ -484,24 +489,27 @@ TRACE_EVENT(rdev_add_key,
WIPHY_ASSIGN;
NETDEV_ASSIGN;
MAC_ASSIGN(mac_addr, mac_addr);
+ __entry->link_id = link_id;
__entry->key_index = key_index;
__entry->pairwise = pairwise;
__entry->mode = mode;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key_index: %u, "
- "mode: %u, pairwise: %s, mac addr: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index,
- __entry->mode, BOOL_TO_STR(__entry->pairwise),
- MAC_PR_ARG(mac_addr))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
+ "key_index: %u, mode: %u, pairwise: %s, "
+ "mac addr: " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
+ __entry->key_index, __entry->mode,
+ BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr))
);
TRACE_EVENT(rdev_set_default_key,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index,
- bool unicast, bool multicast),
- TP_ARGS(wiphy, netdev, key_index, unicast, multicast),
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index, bool unicast, bool multicast),
+ TP_ARGS(wiphy, netdev, link_id, key_index, unicast, multicast),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
+ __field(int, link_id)
__field(u8, key_index)
__field(bool, unicast)
__field(bool, multicast)
@@ -509,48 +517,58 @@ TRACE_EVENT(rdev_set_default_key,
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
+ __entry->link_id = link_id;
__entry->key_index = key_index;
__entry->unicast = unicast;
__entry->multicast = multicast;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u, unicast: %s, multicast: %s",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index,
- BOOL_TO_STR(__entry->unicast),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
+ "key index: %u, unicast: %s, multicast: %s",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
+ __entry->key_index, BOOL_TO_STR(__entry->unicast),
BOOL_TO_STR(__entry->multicast))
);
TRACE_EVENT(rdev_set_default_mgmt_key,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index),
- TP_ARGS(wiphy, netdev, key_index),
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index),
+ TP_ARGS(wiphy, netdev, link_id, key_index),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
+ __field(int, link_id)
__field(u8, key_index)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
+ __entry->link_id = link_id;
__entry->key_index = key_index;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
+ "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->link_id, __entry->key_index)
);
TRACE_EVENT(rdev_set_default_beacon_key,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index),
- TP_ARGS(wiphy, netdev, key_index),
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id,
+ u8 key_index),
+ TP_ARGS(wiphy, netdev, link_id, key_index),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
+ __field(int, link_id)
__field(u8, key_index)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
+ __entry->link_id = link_id;
__entry->key_index = key_index;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
+ "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->link_id, __entry->key_index)
);
TRACE_EVENT(rdev_start_ap,
@@ -569,6 +587,7 @@ TRACE_EVENT(rdev_start_ap,
__field(bool, privacy)
__field(enum nl80211_auth_type, auth_type)
__field(int, inactivity_timeout)
+ __field(unsigned int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -583,16 +602,17 @@ TRACE_EVENT(rdev_start_ap,
__entry->inactivity_timeout = settings->inactivity_timeout;
memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
memcpy(__entry->ssid, settings->ssid, settings->ssid_len);
+ __entry->link_id = settings->beacon.link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", AP settings - ssid: %s, "
CHAN_DEF_PR_FMT ", beacon interval: %d, dtim period: %d, "
"hidden ssid: %d, wpa versions: %u, privacy: %s, "
- "auth type: %d, inactivity timeout: %d",
+ "auth type: %d, inactivity timeout: %d, link_id: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ssid, CHAN_DEF_PR_ARG,
__entry->beacon_interval, __entry->dtim_period,
__entry->hidden_ssid, __entry->wpa_ver,
BOOL_TO_STR(__entry->privacy), __entry->auth_type,
- __entry->inactivity_timeout)
+ __entry->inactivity_timeout, __entry->link_id)
);
TRACE_EVENT(rdev_change_beacon,
@@ -602,6 +622,7 @@ TRACE_EVENT(rdev_change_beacon,
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
+ __field(int, link_id)
__dynamic_array(u8, head, info ? info->head_len : 0)
__dynamic_array(u8, tail, info ? info->tail_len : 0)
__dynamic_array(u8, beacon_ies, info ? info->beacon_ies_len : 0)
@@ -615,6 +636,7 @@ TRACE_EVENT(rdev_change_beacon,
WIPHY_ASSIGN;
NETDEV_ASSIGN;
if (info) {
+ __entry->link_id = info->link_id;
if (info->head)
memcpy(__get_dynamic_array(head), info->head,
info->head_len);
@@ -635,9 +657,30 @@ TRACE_EVENT(rdev_change_beacon,
if (info->probe_resp)
memcpy(__get_dynamic_array(probe_resp),
info->probe_resp, info->probe_resp_len);
+ } else {
+ __entry->link_id = -1;
}
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
+);
+
+TRACE_EVENT(rdev_stop_ap,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, netdev, link_id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(unsigned int, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->link_id = link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
);
DECLARE_EVENT_CLASS(wiphy_netdev_evt,
@@ -654,11 +697,6 @@ DECLARE_EVENT_CLASS(wiphy_netdev_evt,
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG)
);
-DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
- TP_ARGS(wiphy, netdev)
-);
-
DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
TP_ARGS(wiphy, netdev)
@@ -718,7 +756,7 @@ DECLARE_EVENT_CLASS(station_add_change,
__array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap))
__array(char, vlan, IFNAMSIZ)
__dynamic_array(u8, supported_rates,
- params->supported_rates_len)
+ params->link_sta_params.supported_rates_len)
__dynamic_array(u8, ext_capab, params->ext_capab_len)
__dynamic_array(u8, supported_channels,
params->supported_channels_len)
@@ -738,20 +776,23 @@ DECLARE_EVENT_CLASS(station_add_change,
__entry->plink_state = params->plink_state;
__entry->uapsd_queues = params->uapsd_queues;
memset(__entry->ht_capa, 0, sizeof(struct ieee80211_ht_cap));
- if (params->ht_capa)
- memcpy(__entry->ht_capa, params->ht_capa,
+ if (params->link_sta_params.ht_capa)
+ memcpy(__entry->ht_capa,
+ params->link_sta_params.ht_capa,
sizeof(struct ieee80211_ht_cap));
memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap));
- if (params->vht_capa)
- memcpy(__entry->vht_capa, params->vht_capa,
+ if (params->link_sta_params.vht_capa)
+ memcpy(__entry->vht_capa,
+ params->link_sta_params.vht_capa,
sizeof(struct ieee80211_vht_cap));
memset(__entry->vlan, 0, sizeof(__entry->vlan));
if (params->vlan)
memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ);
- if (params->supported_rates && params->supported_rates_len)
+ if (params->link_sta_params.supported_rates &&
+ params->link_sta_params.supported_rates_len)
memcpy(__get_dynamic_array(supported_rates),
- params->supported_rates,
- params->supported_rates_len);
+ params->link_sta_params.supported_rates,
+ params->link_sta_params.supported_rates_len);
if (params->ext_capab && params->ext_capab_len)
memcpy(__get_dynamic_array(ext_capab),
params->ext_capab,
@@ -768,8 +809,9 @@ DECLARE_EVENT_CLASS(station_add_change,
params->supported_oper_classes_len);
__entry->max_sp = params->max_sp;
__entry->capability = params->capability;
- __entry->opmode_notif = params->opmode_notif;
- __entry->opmode_notif_used = params->opmode_notif_used;
+ __entry->opmode_notif = params->link_sta_params.opmode_notif;
+ __entry->opmode_notif_used =
+ params->link_sta_params.opmode_notif_used;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
", station flags mask: %u, station flags set: %u, "
@@ -1208,9 +1250,8 @@ TRACE_EVENT(rdev_auth,
TRACE_EVENT(rdev_assoc,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_assoc_request *req,
- const struct cfg80211_bss_ies *bss_ies),
- TP_ARGS(wiphy, netdev, req, bss_ies),
+ struct cfg80211_assoc_request *req),
+ TP_ARGS(wiphy, netdev, req),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1218,9 +1259,6 @@ TRACE_EVENT(rdev_assoc,
MAC_ENTRY(prev_bssid)
__field(bool, use_mfp)
__field(u32, flags)
- __dynamic_array(u8, bss_elements, bss_ies->len)
- __field(bool, bss_elements_bcon)
- __field(u64, bss_elements_tsf)
__dynamic_array(u8, elements, req->ie_len)
__array(u8, ht_capa, sizeof(struct ieee80211_ht_cap))
__array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap))
@@ -1240,11 +1278,6 @@ TRACE_EVENT(rdev_assoc,
MAC_ASSIGN(prev_bssid, req->prev_bssid);
__entry->use_mfp = req->use_mfp;
__entry->flags = req->flags;
- if (bss_ies->len)
- memcpy(__get_dynamic_array(bss_elements),
- bss_ies->data, bss_ies->len);
- __entry->bss_elements_bcon = bss_ies->from_beacon;
- __entry->bss_elements_tsf = bss_ies->tsf;
if (req->ie)
memcpy(__get_dynamic_array(elements),
req->ie, req->ie_len);
@@ -1303,10 +1336,7 @@ TRACE_EVENT(rdev_disassoc,
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
- if (req->bss)
- MAC_ASSIGN(bssid, req->bss->bssid);
- else
- eth_zero_addr(__entry->bssid);
+ MAC_ASSIGN(bssid, req->ap_addr);
__entry->reason_code = req->reason_code;
__entry->local_state_change = req->local_state_change;
),
@@ -1619,20 +1649,24 @@ TRACE_EVENT(rdev_testmode_dump,
TRACE_EVENT(rdev_set_bitrate_mask,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ unsigned int link_id,
const u8 *peer, const struct cfg80211_bitrate_mask *mask),
- TP_ARGS(wiphy, netdev, peer, mask),
+ TP_ARGS(wiphy, netdev, link_id, peer, mask),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
+ __field(unsigned int, link_id)
MAC_ENTRY(peer)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
+ __entry->link_id = link_id;
MAC_ASSIGN(peer, peer);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
+ MAC_PR_ARG(peer))
);
TRACE_EVENT(rdev_update_mgmt_frame_registrations,
@@ -1999,14 +2033,15 @@ TRACE_EVENT(rdev_mgmt_tx,
TRACE_EVENT(rdev_tx_control_port,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
const u8 *buf, size_t len, const u8 *dest, __be16 proto,
- bool unencrypted),
- TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted),
+ bool unencrypted, int link_id),
+ TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted, link_id),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
MAC_ENTRY(dest)
__field(__be16, proto)
__field(bool, unencrypted)
+ __field(int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -2014,12 +2049,14 @@ TRACE_EVENT(rdev_tx_control_port,
MAC_ASSIGN(dest, dest);
__entry->proto = proto;
__entry->unencrypted = unencrypted;
+ __entry->link_id = link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ","
- " proto: 0x%x, unencrypted: %s",
+ " proto: 0x%x, unencrypted: %s, link: %d",
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest),
be16_to_cpu(__entry->proto),
- BOOL_TO_STR(__entry->unencrypted))
+ BOOL_TO_STR(__entry->unencrypted),
+ __entry->link_id)
);
TRACE_EVENT(rdev_set_noack_map,
@@ -2040,9 +2077,28 @@ TRACE_EVENT(rdev_set_noack_map,
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map)
);
-DEFINE_EVENT(wiphy_wdev_evt, rdev_get_channel,
- TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
- TP_ARGS(wiphy, wdev)
+DECLARE_EVENT_CLASS(wiphy_wdev_link_evt,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, wdev, link_id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(unsigned int, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->link_id = link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u",
+ WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id)
+);
+
+DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_channel,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, wdev, link_id)
);
TRACE_EVENT(rdev_return_chandef,
@@ -2296,20 +2352,24 @@ TRACE_EVENT(rdev_set_qos_map,
TRACE_EVENT(rdev_set_ap_chanwidth,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ unsigned int link_id,
struct cfg80211_chan_def *chandef),
- TP_ARGS(wiphy, netdev, chandef),
+ TP_ARGS(wiphy, netdev, link_id, chandef),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
CHAN_DEF_ENTRY
+ __field(unsigned int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
+ __entry->link_id = link_id;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
+ __entry->link_id)
);
TRACE_EVENT(rdev_add_tx_ts,
@@ -2645,6 +2705,155 @@ TRACE_EVENT(rdev_set_fils_aad,
__entry->kek_len)
);
+TRACE_EVENT(rdev_update_owe_info,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_update_owe_info *owe_info),
+ TP_ARGS(wiphy, netdev, owe_info),
+ TP_STRUCT__entry(WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(peer)
+ __field(u16, status)
+ __dynamic_array(u8, ie, owe_info->ie_len)),
+ TP_fast_assign(WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(peer, owe_info->peer);
+ __entry->status = owe_info->status;
+ memcpy(__get_dynamic_array(ie),
+ owe_info->ie, owe_info->ie_len);),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT
+ " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+ __entry->status)
+);
+
+TRACE_EVENT(rdev_probe_mesh_link,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ const u8 *dest, const u8 *buf, size_t len),
+ TP_ARGS(wiphy, netdev, dest, buf, len),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(dest)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(dest, dest);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest))
+);
+
+TRACE_EVENT(rdev_set_tid_config,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_tid_config *tid_conf),
+ TP_ARGS(wiphy, netdev, tid_conf),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(peer)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(peer, tid_conf->peer);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+);
+
+TRACE_EVENT(rdev_reset_tid_config,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ const u8 *peer, u8 tids),
+ TP_ARGS(wiphy, netdev, peer, tids),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(peer)
+ __field(u8, tids)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(peer, peer);
+ __entry->tids = tids;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids)
+);
+
+TRACE_EVENT(rdev_set_sar_specs,
+ TP_PROTO(struct wiphy *wiphy, struct cfg80211_sar_specs *sar),
+ TP_ARGS(wiphy, sar),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(u16, type)
+ __field(u16, num)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->type = sar->type;
+ __entry->num = sar->num_sub_specs;
+
+ ),
+ TP_printk(WIPHY_PR_FMT ", Set type:%d, num_specs:%d",
+ WIPHY_PR_ARG, __entry->type, __entry->num)
+);
+
+TRACE_EVENT(rdev_color_change,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_color_change_settings *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(u8, count)
+ __field(u16, bcn_ofs)
+ __field(u16, pres_ofs)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->count = params->count;
+ __entry->bcn_ofs = params->counter_offset_beacon;
+ __entry->pres_ofs = params->counter_offset_presp;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
+ ", count: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->count)
+);
+
+TRACE_EVENT(rdev_set_radar_background,
+ TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
+
+ TP_ARGS(wiphy, chandef),
+
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ CHAN_DEF_ENTRY
+ ),
+
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ CHAN_DEF_ASSIGN(chandef)
+ ),
+
+ TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
+ WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+);
+
+DEFINE_EVENT(wiphy_wdev_link_evt, rdev_add_intf_link,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, wdev, link_id)
+);
+
+DEFINE_EVENT(wiphy_wdev_link_evt, rdev_del_intf_link,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, wdev, link_id)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -2699,20 +2908,20 @@ DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth,
);
TRACE_EVENT(cfg80211_send_rx_assoc,
- TP_PROTO(struct net_device *netdev, struct cfg80211_bss *bss),
- TP_ARGS(netdev, bss),
+ TP_PROTO(struct net_device *netdev,
+ struct cfg80211_rx_assoc_resp *data),
+ TP_ARGS(netdev, data),
TP_STRUCT__entry(
NETDEV_ENTRY
- MAC_ENTRY(bssid)
- CHAN_ENTRY
+ MAC_ENTRY(ap_addr)
),
TP_fast_assign(
NETDEV_ASSIGN;
- MAC_ASSIGN(bssid, bss->bssid);
- CHAN_ASSIGN(bss->channel);
+ MAC_ASSIGN(ap_addr,
+ data->ap_mld_addr ?: data->links[0].bss->bssid);
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", " CHAN_PR_FMT,
- NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG)
+ TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT,
+ NETDEV_PR_ARG, MAC_PR_ARG(ap_addr))
);
DECLARE_EVENT_CLASS(netdev_frame_event,
@@ -2781,9 +2990,22 @@ DEFINE_EVENT(netdev_mac_evt, cfg80211_send_auth_timeout,
TP_ARGS(netdev, mac)
);
-DEFINE_EVENT(netdev_mac_evt, cfg80211_send_assoc_timeout,
- TP_PROTO(struct net_device *netdev, const u8 *mac),
- TP_ARGS(netdev, mac)
+TRACE_EVENT(cfg80211_send_assoc_failure,
+ TP_PROTO(struct net_device *netdev,
+ struct cfg80211_assoc_failure *data),
+ TP_ARGS(netdev, data),
+ TP_STRUCT__entry(
+ NETDEV_ENTRY
+ MAC_ENTRY(ap_addr)
+ __field(bool, timeout)
+ ),
+ TP_fast_assign(
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(ap_addr, data->ap_mld_addr ?: data->bss[0]->bssid);
+ __entry->timeout = data->timeout;
+ ),
+ TP_printk(NETDEV_PR_FMT ", mac: " MAC_PR_FMT ", timeout: %d",
+ NETDEV_PR_ARG, MAC_PR_ARG(ap_addr), __entry->timeout)
);
TRACE_EVENT(cfg80211_michael_mic_failure,
@@ -2892,8 +3114,8 @@ DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta,
);
TRACE_EVENT(cfg80211_rx_mgmt,
- TP_PROTO(struct wireless_dev *wdev, int freq, int sig_dbm),
- TP_ARGS(wdev, freq, sig_dbm),
+ TP_PROTO(struct wireless_dev *wdev, struct cfg80211_rx_info *info),
+ TP_ARGS(wdev, info),
TP_STRUCT__entry(
WDEV_ENTRY
__field(int, freq)
@@ -2901,8 +3123,8 @@ TRACE_EVENT(cfg80211_rx_mgmt,
),
TP_fast_assign(
WDEV_ASSIGN;
- __entry->freq = freq;
- __entry->sig_dbm = sig_dbm;
+ __entry->freq = info->freq;
+ __entry->sig_dbm = info->sig_dbm;
),
TP_printk(WDEV_PR_FMT ", freq: "KHZ_F", sig dbm: %d",
WDEV_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
@@ -3022,34 +3244,40 @@ TRACE_EVENT(cfg80211_chandef_dfs_required,
TRACE_EVENT(cfg80211_ch_switch_notify,
TP_PROTO(struct net_device *netdev,
- struct cfg80211_chan_def *chandef),
- TP_ARGS(netdev, chandef),
+ struct cfg80211_chan_def *chandef,
+ unsigned int link_id),
+ TP_ARGS(netdev, chandef, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
+ __field(unsigned int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);
TRACE_EVENT(cfg80211_ch_switch_started_notify,
TP_PROTO(struct net_device *netdev,
- struct cfg80211_chan_def *chandef),
- TP_ARGS(netdev, chandef),
+ struct cfg80211_chan_def *chandef,
+ unsigned int link_id),
+ TP_ARGS(netdev, chandef, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
+ __field(unsigned int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
);
TRACE_EVENT(cfg80211_radar_event,
@@ -3520,26 +3748,6 @@ TRACE_EVENT(cfg80211_pmsr_complete,
(unsigned long long)__entry->cookie)
);
-TRACE_EVENT(rdev_update_owe_info,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_update_owe_info *owe_info),
- TP_ARGS(wiphy, netdev, owe_info),
- TP_STRUCT__entry(WIPHY_ENTRY
- NETDEV_ENTRY
- MAC_ENTRY(peer)
- __field(u16, status)
- __dynamic_array(u8, ie, owe_info->ie_len)),
- TP_fast_assign(WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- MAC_ASSIGN(peer, owe_info->peer);
- __entry->status = owe_info->status;
- memcpy(__get_dynamic_array(ie),
- owe_info->ie, owe_info->ie_len);),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT
- " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
- __entry->status)
-);
-
TRACE_EVENT(cfg80211_update_owe_info_event,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_update_owe_info *owe_info),
@@ -3557,160 +3765,140 @@ TRACE_EVENT(cfg80211_update_owe_info_event,
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
);
-TRACE_EVENT(rdev_probe_mesh_link,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- const u8 *dest, const u8 *buf, size_t len),
- TP_ARGS(wiphy, netdev, dest, buf, len),
+TRACE_EVENT(cfg80211_bss_color_notify,
+ TP_PROTO(struct net_device *netdev,
+ enum nl80211_commands cmd,
+ u8 count, u64 color_bitmap),
+ TP_ARGS(netdev, cmd, count, color_bitmap),
TP_STRUCT__entry(
- WIPHY_ENTRY
NETDEV_ENTRY
- MAC_ENTRY(dest)
+ __field(u32, cmd)
+ __field(u8, count)
+ __field(u64, color_bitmap)
),
TP_fast_assign(
- WIPHY_ASSIGN;
NETDEV_ASSIGN;
- MAC_ASSIGN(dest, dest);
+ __entry->cmd = cmd;
+ __entry->count = count;
+ __entry->color_bitmap = color_bitmap;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest))
+ TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx",
+ NETDEV_PR_ARG, __entry->cmd, __entry->count,
+ __entry->color_bitmap)
);
-TRACE_EVENT(rdev_set_tid_config,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_tid_config *tid_conf),
- TP_ARGS(wiphy, netdev, tid_conf),
+TRACE_EVENT(cfg80211_assoc_comeback,
+ TP_PROTO(struct wireless_dev *wdev, const u8 *ap_addr, u32 timeout),
+ TP_ARGS(wdev, ap_addr, timeout),
TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- MAC_ENTRY(peer)
+ WDEV_ENTRY
+ MAC_ENTRY(ap_addr)
+ __field(u32, timeout)
),
TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- MAC_ASSIGN(peer, tid_conf->peer);
+ WDEV_ASSIGN;
+ MAC_ASSIGN(ap_addr, ap_addr);
+ __entry->timeout = timeout;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+ TP_printk(WDEV_PR_FMT ", " MAC_PR_FMT ", timeout: %u TUs",
+ WDEV_PR_ARG, MAC_PR_ARG(ap_addr), __entry->timeout)
);
-TRACE_EVENT(rdev_reset_tid_config,
+DECLARE_EVENT_CLASS(link_station_add_mod,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- const u8 *peer, u8 tids),
- TP_ARGS(wiphy, netdev, peer, tids),
+ struct link_station_parameters *params),
+ TP_ARGS(wiphy, netdev, params),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
- MAC_ENTRY(peer)
- __field(u8, tids)
+ __array(u8, mld_mac, 6)
+ __array(u8, link_mac, 6)
+ __field(u32, link_id)
+ __dynamic_array(u8, supported_rates,
+ params->supported_rates_len)
+ __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap))
+ __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap))
+ __field(u8, opmode_notif)
+ __field(bool, opmode_notif_used)
+ __dynamic_array(u8, he_capa, params->he_capa_len)
+ __array(u8, he_6ghz_capa, (int)sizeof(struct ieee80211_he_6ghz_capa))
+ __dynamic_array(u8, eht_capa, params->eht_capa_len)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
- MAC_ASSIGN(peer, peer);
- __entry->tids = tids;
+ memset(__entry->mld_mac, 0, 6);
+ memset(__entry->link_mac, 0, 6);
+ if (params->mld_mac)
+ memcpy(__entry->mld_mac, params->mld_mac, 6);
+ if (params->link_mac)
+ memcpy(__entry->link_mac, params->link_mac, 6);
+ __entry->link_id = params->link_id;
+ if (params->supported_rates && params->supported_rates_len)
+ memcpy(__get_dynamic_array(supported_rates),
+ params->supported_rates,
+ params->supported_rates_len);
+ memset(__entry->ht_capa, 0, sizeof(struct ieee80211_ht_cap));
+ if (params->ht_capa)
+ memcpy(__entry->ht_capa, params->ht_capa,
+ sizeof(struct ieee80211_ht_cap));
+ memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap));
+ if (params->vht_capa)
+ memcpy(__entry->vht_capa, params->vht_capa,
+ sizeof(struct ieee80211_vht_cap));
+ __entry->opmode_notif = params->opmode_notif;
+ __entry->opmode_notif_used = params->opmode_notif_used;
+ if (params->he_capa && params->he_capa_len)
+ memcpy(__get_dynamic_array(he_capa), params->he_capa,
+ params->he_capa_len);
+ memset(__entry->he_6ghz_capa, 0, sizeof(struct ieee80211_he_6ghz_capa));
+ if (params->he_6ghz_capa)
+ memcpy(__entry->he_6ghz_capa, params->he_6ghz_capa,
+ sizeof(struct ieee80211_he_6ghz_capa));
+ if (params->eht_capa && params->eht_capa_len)
+ memcpy(__get_dynamic_array(eht_capa), params->eht_capa,
+ params->eht_capa_len);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
+ ", link mac: " MAC_PR_FMT ", link id: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(mld_mac),
+ MAC_PR_ARG(link_mac), __entry->link_id)
);
-TRACE_EVENT(rdev_set_sar_specs,
- TP_PROTO(struct wiphy *wiphy, struct cfg80211_sar_specs *sar),
- TP_ARGS(wiphy, sar),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- __field(u16, type)
- __field(u16, num)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- __entry->type = sar->type;
- __entry->num = sar->num_sub_specs;
+DEFINE_EVENT(link_station_add_mod, rdev_add_link_station,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct link_station_parameters *params),
+ TP_ARGS(wiphy, netdev, params)
+);
- ),
- TP_printk(WIPHY_PR_FMT ", Set type:%d, num_specs:%d",
- WIPHY_PR_ARG, __entry->type, __entry->num)
+DEFINE_EVENT(link_station_add_mod, rdev_mod_link_station,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct link_station_parameters *params),
+ TP_ARGS(wiphy, netdev, params)
);
-TRACE_EVENT(rdev_color_change,
+TRACE_EVENT(rdev_del_link_station,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_color_change_settings *params),
+ struct link_station_del_parameters *params),
TP_ARGS(wiphy, netdev, params),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
- __field(u8, count)
- __field(u16, bcn_ofs)
- __field(u16, pres_ofs)
+ __array(u8, mld_mac, 6)
+ __field(u32, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
- __entry->count = params->count;
- __entry->bcn_ofs = params->counter_offset_beacon;
- __entry->pres_ofs = params->counter_offset_presp;
+ memset(__entry->mld_mac, 0, 6);
+ if (params->mld_mac)
+ memcpy(__entry->mld_mac, params->mld_mac, 6);
+ __entry->link_id = params->link_id;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
- ", count: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG,
- __entry->count)
-);
-
-TRACE_EVENT(cfg80211_bss_color_notify,
- TP_PROTO(struct net_device *netdev,
- enum nl80211_commands cmd,
- u8 count, u64 color_bitmap),
- TP_ARGS(netdev, cmd, count, color_bitmap),
- TP_STRUCT__entry(
- NETDEV_ENTRY
- __field(u32, cmd)
- __field(u8, count)
- __field(u64, color_bitmap)
- ),
- TP_fast_assign(
- NETDEV_ASSIGN;
- __entry->cmd = cmd;
- __entry->count = count;
- __entry->color_bitmap = color_bitmap;
- ),
- TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx",
- NETDEV_PR_ARG, __entry->cmd, __entry->count,
- __entry->color_bitmap)
-);
-
-TRACE_EVENT(rdev_set_radar_background,
- TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
-
- TP_ARGS(wiphy, chandef),
-
- TP_STRUCT__entry(
- WIPHY_ENTRY
- CHAN_DEF_ENTRY
- ),
-
- TP_fast_assign(
- WIPHY_ASSIGN;
- CHAN_DEF_ASSIGN(chandef)
- ),
-
- TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
- WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
-);
-
-TRACE_EVENT(cfg80211_assoc_comeback,
- TP_PROTO(struct wireless_dev *wdev, const u8 *bssid, u32 timeout),
- TP_ARGS(wdev, bssid, timeout),
- TP_STRUCT__entry(
- WDEV_ENTRY
- MAC_ENTRY(bssid)
- __field(u32, timeout)
- ),
- TP_fast_assign(
- WDEV_ASSIGN;
- MAC_ASSIGN(bssid, bssid);
- __entry->timeout = timeout;
- ),
- TP_printk(WDEV_PR_FMT ", " MAC_PR_FMT ", timeout: %u TUs",
- WDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->timeout)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
+ ", link id: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(mld_mac),
+ __entry->link_id)
);
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 41ea65deb6e1..39680e7bad45 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -559,7 +559,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
return -1;
hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
- if (skb->len < hdrlen + 8)
+ if (skb->len < hdrlen)
return -1;
/* convert IEEE 802.11 header + possible LLC headers into Ethernet
@@ -574,8 +574,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
memcpy(tmp.h_dest, ieee80211_get_DA(hdr), ETH_ALEN);
memcpy(tmp.h_source, ieee80211_get_SA(hdr), ETH_ALEN);
- if (iftype == NL80211_IFTYPE_MESH_POINT)
- skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
+ if (iftype == NL80211_IFTYPE_MESH_POINT &&
+ skb_copy_bits(skb, hdrlen, &mesh_flags, 1) < 0)
+ return -1;
mesh_flags &= MESH_FLAGS_AE;
@@ -595,11 +596,12 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
if (iftype == NL80211_IFTYPE_MESH_POINT) {
if (mesh_flags == MESH_FLAGS_AE_A4)
return -1;
- if (mesh_flags == MESH_FLAGS_AE_A5_A6) {
- skb_copy_bits(skb, hdrlen +
- offsetof(struct ieee80211s_hdr, eaddr1),
- tmp.h_dest, 2 * ETH_ALEN);
- }
+ if (mesh_flags == MESH_FLAGS_AE_A5_A6 &&
+ skb_copy_bits(skb, hdrlen +
+ offsetof(struct ieee80211s_hdr, eaddr1),
+ tmp.h_dest, 2 * ETH_ALEN) < 0)
+ return -1;
+
hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
}
break;
@@ -613,10 +615,11 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
if (iftype == NL80211_IFTYPE_MESH_POINT) {
if (mesh_flags == MESH_FLAGS_AE_A5_A6)
return -1;
- if (mesh_flags == MESH_FLAGS_AE_A4)
- skb_copy_bits(skb, hdrlen +
- offsetof(struct ieee80211s_hdr, eaddr1),
- tmp.h_source, ETH_ALEN);
+ if (mesh_flags == MESH_FLAGS_AE_A4 &&
+ skb_copy_bits(skb, hdrlen +
+ offsetof(struct ieee80211s_hdr, eaddr1),
+ tmp.h_source, ETH_ALEN) < 0)
+ return -1;
hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
}
break;
@@ -628,18 +631,19 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
break;
}
- skb_copy_bits(skb, hdrlen, &payload, sizeof(payload));
- tmp.h_proto = payload.proto;
-
- if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
- tmp.h_proto != htons(ETH_P_AARP) &&
- tmp.h_proto != htons(ETH_P_IPX)) ||
- ether_addr_equal(payload.hdr, bridge_tunnel_header)))
+ if (likely(skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
+ ((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
+ payload.proto != htons(ETH_P_AARP) &&
+ payload.proto != htons(ETH_P_IPX)) ||
+ ether_addr_equal(payload.hdr, bridge_tunnel_header)))) {
/* remove RFC1042 or Bridge-Tunnel encapsulation and
* replace EtherType */
hdrlen += ETH_ALEN + 2;
- else
+ tmp.h_proto = payload.proto;
+ skb_postpull_rcsum(skb, &payload, ETH_ALEN + 2);
+ } else {
tmp.h_proto = htons(skb->len - hdrlen);
+ }
pskb_pull(skb, hdrlen);
@@ -933,13 +937,13 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) {
if (!wdev->connect_keys->params[i].cipher)
continue;
- if (rdev_add_key(rdev, dev, i, false, NULL,
+ if (rdev_add_key(rdev, dev, -1, i, false, NULL,
&wdev->connect_keys->params[i])) {
netdev_err(dev, "failed to set key %d\n", i);
continue;
}
if (wdev->connect_keys->def == i &&
- rdev_set_default_key(rdev, dev, i, true, true)) {
+ rdev_set_default_key(rdev, dev, -1, i, true, true)) {
netdev_err(dev, "failed to set defkey %d\n", i);
continue;
}
@@ -1039,7 +1043,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
return -EBUSY;
dev->ieee80211_ptr->use_4addr = false;
- dev->ieee80211_ptr->mesh_id_up_len = 0;
wdev_lock(dev->ieee80211_ptr);
rdev_set_qos_map(rdev, dev, NULL);
wdev_unlock(dev->ieee80211_ptr);
@@ -1047,7 +1050,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
switch (otype) {
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
- cfg80211_stop_ap(rdev, dev, true);
+ cfg80211_stop_ap(rdev, dev, -1, true);
break;
case NL80211_IFTYPE_ADHOC:
cfg80211_leave_ibss(rdev, dev, false);
@@ -1071,6 +1074,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
cfg80211_process_rdev_events(rdev);
cfg80211_mlme_purge_registrations(dev->ieee80211_ptr);
+
+ memset(&dev->ieee80211_ptr->u, 0,
+ sizeof(dev->ieee80211_ptr->u));
+ memset(&dev->ieee80211_ptr->links, 0,
+ sizeof(dev->ieee80211_ptr->links));
}
err = rdev_change_virtual_intf(rdev, dev, ntype, params);
@@ -1355,7 +1363,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
25599, /* 4.166666... */
17067, /* 2.777777... */
12801, /* 2.083333... */
- 11769, /* 1.851851... */
+ 11377, /* 1.851725... */
10239, /* 1.666666... */
8532, /* 1.388888... */
7680, /* 1.250000... */
@@ -1428,6 +1436,137 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
return result / 10000;
}
+static u32 cfg80211_calculate_bitrate_eht(struct rate_info *rate)
+{
+#define SCALE 6144
+ static const u32 mcs_divisors[16] = {
+ 102399, /* 16.666666... */
+ 51201, /* 8.333333... */
+ 34134, /* 5.555555... */
+ 25599, /* 4.166666... */
+ 17067, /* 2.777777... */
+ 12801, /* 2.083333... */
+ 11377, /* 1.851725... */
+ 10239, /* 1.666666... */
+ 8532, /* 1.388888... */
+ 7680, /* 1.250000... */
+ 6828, /* 1.111111... */
+ 6144, /* 1.000000... */
+ 5690, /* 0.926106... */
+ 5120, /* 0.833333... */
+ 409600, /* 66.666666... */
+ 204800, /* 33.333333... */
+ };
+ static const u32 rates_996[3] = { 480388888, 453700000, 408333333 };
+ static const u32 rates_484[3] = { 229411111, 216666666, 195000000 };
+ static const u32 rates_242[3] = { 114711111, 108333333, 97500000 };
+ static const u32 rates_106[3] = { 40000000, 37777777, 34000000 };
+ static const u32 rates_52[3] = { 18820000, 17777777, 16000000 };
+ static const u32 rates_26[3] = { 9411111, 8888888, 8000000 };
+ u64 tmp;
+ u32 result;
+
+ if (WARN_ON_ONCE(rate->mcs > 15))
+ return 0;
+ if (WARN_ON_ONCE(rate->eht_gi > NL80211_RATE_INFO_EHT_GI_3_2))
+ return 0;
+ if (WARN_ON_ONCE(rate->eht_ru_alloc >
+ NL80211_RATE_INFO_EHT_RU_ALLOC_4x996))
+ return 0;
+ if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
+ return 0;
+
+ /* Bandwidth checks for MCS 14 */
+ if (rate->mcs == 14) {
+ if ((rate->bw != RATE_INFO_BW_EHT_RU &&
+ rate->bw != RATE_INFO_BW_80 &&
+ rate->bw != RATE_INFO_BW_160 &&
+ rate->bw != RATE_INFO_BW_320) ||
+ (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc != NL80211_RATE_INFO_EHT_RU_ALLOC_996 &&
+ rate->eht_ru_alloc != NL80211_RATE_INFO_EHT_RU_ALLOC_2x996 &&
+ rate->eht_ru_alloc != NL80211_RATE_INFO_EHT_RU_ALLOC_4x996)) {
+ WARN(1, "invalid EHT BW for MCS 14: bw:%d, ru:%d\n",
+ rate->bw, rate->eht_ru_alloc);
+ return 0;
+ }
+ }
+
+ if (rate->bw == RATE_INFO_BW_320 ||
+ (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_4x996))
+ result = 4 * rates_996[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484)
+ result = 3 * rates_996[rate->eht_gi] + rates_484[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_3x996)
+ result = 3 * rates_996[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484)
+ result = 2 * rates_996[rate->eht_gi] + rates_484[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_160 ||
+ (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_2x996))
+ result = 2 * rates_996[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc ==
+ NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242)
+ result = rates_996[rate->eht_gi] + rates_484[rate->eht_gi]
+ + rates_242[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_996P484)
+ result = rates_996[rate->eht_gi] + rates_484[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_80 ||
+ (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_996))
+ result = rates_996[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_484P242)
+ result = rates_484[rate->eht_gi] + rates_242[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_40 ||
+ (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_484))
+ result = rates_484[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_20 ||
+ (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_242))
+ result = rates_242[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_106P26)
+ result = rates_106[rate->eht_gi] + rates_26[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_106)
+ result = rates_106[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_52P26)
+ result = rates_52[rate->eht_gi] + rates_26[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_52)
+ result = rates_52[rate->eht_gi];
+ else if (rate->bw == RATE_INFO_BW_EHT_RU &&
+ rate->eht_ru_alloc == NL80211_RATE_INFO_EHT_RU_ALLOC_26)
+ result = rates_26[rate->eht_gi];
+ else {
+ WARN(1, "invalid EHT MCS: bw:%d, ru:%d\n",
+ rate->bw, rate->eht_ru_alloc);
+ return 0;
+ }
+
+ /* now scale to the appropriate MCS */
+ tmp = result;
+ tmp *= SCALE;
+ do_div(tmp, mcs_divisors[rate->mcs]);
+
+ /* and take NSS */
+ tmp *= rate->nss;
+ do_div(tmp, 8);
+
+ result = tmp;
+
+ return result / 10000;
+}
+
u32 cfg80211_calculate_bitrate(struct rate_info *rate)
{
if (rate->flags & RATE_INFO_FLAGS_MCS)
@@ -1442,6 +1581,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
return cfg80211_calculate_bitrate_vht(rate);
if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
return cfg80211_calculate_bitrate_he(rate);
+ if (rate->flags & RATE_INFO_FLAGS_EHT_MCS)
+ return cfg80211_calculate_bitrate_eht(rate);
return rate->legacy;
}
@@ -1797,6 +1938,24 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
}
EXPORT_SYMBOL(ieee80211_chandef_to_operating_class);
+static int cfg80211_wdev_bi(struct wireless_dev *wdev)
+{
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ WARN_ON(wdev->valid_links);
+ return wdev->links[0].ap.beacon_interval;
+ case NL80211_IFTYPE_MESH_POINT:
+ return wdev->u.mesh.beacon_interval;
+ case NL80211_IFTYPE_ADHOC:
+ return wdev->u.ibss.beacon_interval;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
u32 *beacon_int_gcd,
bool *beacon_int_different)
@@ -1807,19 +1966,27 @@ static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
*beacon_int_different = false;
list_for_each_entry(wdev, &wiphy->wdev_list, list) {
- if (!wdev->beacon_interval)
+ int wdev_bi;
+
+ /* this feature isn't supported with MLO */
+ if (wdev->valid_links)
+ continue;
+
+ wdev_bi = cfg80211_wdev_bi(wdev);
+
+ if (!wdev_bi)
continue;
if (!*beacon_int_gcd) {
- *beacon_int_gcd = wdev->beacon_interval;
+ *beacon_int_gcd = wdev_bi;
continue;
}
- if (wdev->beacon_interval == *beacon_int_gcd)
+ if (wdev_bi == *beacon_int_gcd)
continue;
*beacon_int_different = true;
- *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval);
+ *beacon_int_gcd = gcd(*beacon_int_gcd, wdev_bi);
}
if (new_beacon_int && *beacon_int_gcd != new_beacon_int) {
@@ -2151,7 +2318,7 @@ void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr)
skb->dev = dev;
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- netif_rx_ni(skb);
+ netif_rx(skb);
}
EXPORT_SYMBOL(cfg80211_send_layer2_update);
@@ -2284,3 +2451,60 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
return false;
}
EXPORT_SYMBOL(cfg80211_iftype_allowed);
+
+void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+
+ ASSERT_WDEV_LOCK(wdev);
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ __cfg80211_stop_ap(rdev, wdev->netdev, link_id, true);
+ break;
+ default:
+ /* per-link not relevant */
+ break;
+ }
+
+ wdev->valid_links &= ~BIT(link_id);
+
+ rdev_del_intf_link(rdev, wdev, link_id);
+
+ eth_zero_addr(wdev->links[link_id].addr);
+}
+
+void cfg80211_remove_links(struct wireless_dev *wdev)
+{
+ unsigned int link_id;
+
+ wdev_lock(wdev);
+ if (wdev->valid_links) {
+ for_each_valid_link(wdev, link_id)
+ cfg80211_remove_link(wdev, link_id);
+ }
+ wdev_unlock(wdev);
+}
+
+int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ cfg80211_remove_links(wdev);
+
+ return rdev_del_virtual_intf(rdev, wdev);
+}
+
+const struct wiphy_iftype_ext_capab *
+cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type)
+{
+ int i;
+
+ for (i = 0; i < wiphy->num_iftype_ext_capab; i++) {
+ if (wiphy->iftype_ext_capab[i].iftype == type)
+ return &wiphy->iftype_ext_capab[i];
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(cfg80211_get_iftype_ext_capa);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a32065d600a1..ddf340bfa07a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -7,7 +7,7 @@
* we directly assign the wireless handlers of wireless interfaces.
*
* Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
*/
#include <linux/export.h>
@@ -415,6 +415,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
int err, i;
bool rejoin = false;
+ if (wdev->valid_links)
+ return -EINVAL;
+
if (pairwise && !addr)
return -EINVAL;
@@ -437,7 +440,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
return -EOPNOTSUPP;
if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
- if (!wdev->current_bss)
+ if (!wdev->connected)
return -ENOLINK;
if (!rdev->ops->set_default_mgmt_key)
@@ -450,7 +453,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
if (remove) {
err = 0;
- if (wdev->current_bss) {
+ if (wdev->connected ||
+ (wdev->iftype == NL80211_IFTYPE_ADHOC &&
+ wdev->u.ibss.current_bss)) {
/*
* If removing the current TX key, we will need to
* join a new IBSS without the privacy bit clear.
@@ -465,7 +470,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
!(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
err = -ENOENT;
else
- err = rdev_del_key(rdev, dev, idx, pairwise,
+ err = rdev_del_key(rdev, dev, -1, idx, pairwise,
addr);
}
wdev->wext.connect.privacy = false;
@@ -501,8 +506,10 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
return -EINVAL;
err = 0;
- if (wdev->current_bss)
- err = rdev_add_key(rdev, dev, idx, pairwise, addr, params);
+ if (wdev->connected ||
+ (wdev->iftype == NL80211_IFTYPE_ADHOC &&
+ wdev->u.ibss.current_bss))
+ err = rdev_add_key(rdev, dev, -1, idx, pairwise, addr, params);
else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
params->cipher != WLAN_CIPHER_SUITE_WEP104)
return -EINVAL;
@@ -526,7 +533,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
if ((params->cipher == WLAN_CIPHER_SUITE_WEP40 ||
params->cipher == WLAN_CIPHER_SUITE_WEP104) &&
(tx_key || (!addr && wdev->wext.default_key == -1))) {
- if (wdev->current_bss) {
+ if (wdev->connected ||
+ (wdev->iftype == NL80211_IFTYPE_ADHOC &&
+ wdev->u.ibss.current_bss)) {
/*
* If we are getting a new TX key from not having
* had one before we need to join a new IBSS with
@@ -537,7 +546,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
__cfg80211_leave_ibss(rdev, wdev->netdev, true);
rejoin = true;
}
- err = rdev_set_default_key(rdev, dev, idx, true, true);
+ err = rdev_set_default_key(rdev, dev, -1, idx, true,
+ true);
}
if (!err) {
wdev->wext.default_key = idx;
@@ -549,8 +559,10 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC &&
(tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) {
- if (wdev->current_bss)
- err = rdev_set_default_mgmt_key(rdev, dev, idx);
+ if (wdev->connected ||
+ (wdev->iftype == NL80211_IFTYPE_ADHOC &&
+ wdev->u.ibss.current_bss))
+ err = rdev_set_default_mgmt_key(rdev, dev, -1, idx);
if (!err)
wdev->wext.default_mgmt_key = idx;
return err;
@@ -595,6 +607,11 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
return -EOPNOTSUPP;
wiphy_lock(&rdev->wiphy);
+ if (wdev->valid_links) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
idx = erq->flags & IW_ENCODE_INDEX;
if (idx == 0) {
idx = wdev->wext.default_key;
@@ -613,8 +630,10 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
/* No key data - just set the default TX key index */
err = 0;
wdev_lock(wdev);
- if (wdev->current_bss)
- err = rdev_set_default_key(rdev, dev, idx, true,
+ if (wdev->connected ||
+ (wdev->iftype == NL80211_IFTYPE_ADHOC &&
+ wdev->u.ibss.current_bss))
+ err = rdev_set_default_key(rdev, dev, -1, idx, true,
true);
if (!err)
wdev->wext.default_key = idx;
@@ -667,6 +686,13 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev,
!rdev->ops->set_default_key)
return -EOPNOTSUPP;
+ wdev_lock(wdev);
+ if (wdev->valid_links) {
+ wdev_unlock(wdev);
+ return -EOPNOTSUPP;
+ }
+ wdev_unlock(wdev);
+
switch (ext->alg) {
case IW_ENCODE_ALG_NONE:
remove = true;
@@ -865,7 +891,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev,
break;
}
- ret = rdev_get_channel(rdev, wdev, &chandef);
+ ret = rdev_get_channel(rdev, wdev, 0, &chandef);
if (ret)
break;
freq->m = chandef.chan->center_freq;
@@ -1270,7 +1296,10 @@ static int cfg80211_wext_siwrate(struct net_device *dev,
return -EINVAL;
wiphy_lock(&rdev->wiphy);
- ret = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ if (dev->ieee80211_ptr->valid_links)
+ ret = -EOPNOTSUPP;
+ else
+ ret = rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask);
wiphy_unlock(&rdev->wiphy);
return ret;
@@ -1294,8 +1323,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
err = 0;
wdev_lock(wdev);
- if (wdev->current_bss)
- memcpy(addr, wdev->current_bss->pub.bssid, ETH_ALEN);
+ if (!wdev->valid_links && wdev->links[0].client.current_bss)
+ memcpy(addr, wdev->links[0].client.current_bss->pub.bssid,
+ ETH_ALEN);
else
err = -EOPNOTSUPP;
wdev_unlock(wdev);
@@ -1339,11 +1369,11 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
/* Grab BSSID of current BSS, if any */
wdev_lock(wdev);
- if (!wdev->current_bss) {
+ if (wdev->valid_links || !wdev->links[0].client.current_bss) {
wdev_unlock(wdev);
return NULL;
}
- memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN);
+ memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN);
wdev_unlock(wdev);
memset(&sinfo, 0, sizeof(sinfo));
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 76a80a41615b..fe8765c4075d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -468,6 +468,7 @@ void wireless_send_event(struct net_device * dev,
struct __compat_iw_event *compat_event;
struct compat_iw_point compat_wrqu;
struct sk_buff *compskb;
+ int ptr_len;
#endif
/*
@@ -582,6 +583,9 @@ void wireless_send_event(struct net_device * dev,
nlmsg_end(skb, nlh);
#ifdef CONFIG_COMPAT
hdr_len = compat_event_type_size[descr->header_type];
+
+ /* ptr_len is remaining size in event header apart from LCP */
+ ptr_len = hdr_len - IW_EV_COMPAT_LCP_LEN;
event_len = hdr_len + extra_len;
compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -612,16 +616,15 @@ void wireless_send_event(struct net_device * dev,
if (descr->header_type == IW_HEADER_TYPE_POINT) {
compat_wrqu.length = wrqu->data.length;
compat_wrqu.flags = wrqu->data.flags;
- memcpy(&compat_event->pointer,
- ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF,
- hdr_len - IW_EV_COMPAT_LCP_LEN);
+ memcpy(compat_event->ptr_bytes,
+ ((char *)&compat_wrqu) + IW_EV_COMPAT_POINT_OFF,
+ ptr_len);
if (extra_len)
- memcpy(((char *) compat_event) + hdr_len,
- extra, extra_len);
+ memcpy(&compat_event->ptr_bytes[ptr_len],
+ extra, extra_len);
} else {
/* extra_len must be zero, so no if (extra) needed */
- memcpy(&compat_event->pointer, wrqu,
- hdr_len - IW_EV_COMPAT_LCP_LEN);
+ memcpy(compat_event->ptr_bytes, wrqu, ptr_len);
}
nlmsg_end(compskb, nlh);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index cd09a9042261..68f45afc352d 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -3,7 +3,7 @@
* cfg80211 wext compat for managed mode.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009, 2020-2021 Intel Corporation.
+ * Copyright (C) 2009, 2020-2022 Intel Corporation
*/
#include <linux/export.h>
@@ -124,9 +124,12 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
return -EINVAL;
+ if (wdev->valid_links)
+ return -EOPNOTSUPP;
+
wdev_lock(wdev);
- if (wdev->current_bss)
- chan = wdev->current_bss->pub.channel;
+ if (wdev->links[0].client.current_bss)
+ chan = wdev->links[0].client.current_bss->pub.channel;
else if (wdev->wext.connect.channel)
chan = wdev->wext.connect.channel;
wdev_unlock(wdev);
@@ -208,15 +211,19 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
return -EINVAL;
+ if (wdev->valid_links)
+ return -EINVAL;
+
data->flags = 0;
wdev_lock(wdev);
- if (wdev->current_bss) {
+ if (wdev->links[0].client.current_bss) {
const struct element *ssid_elem;
rcu_read_lock();
- ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub,
- WLAN_EID_SSID);
+ ssid_elem = ieee80211_bss_get_elem(
+ &wdev->links[0].client.current_bss->pub,
+ WLAN_EID_SSID);
if (ssid_elem) {
data->flags = 1;
data->length = ssid_elem->datalen;
@@ -300,8 +307,14 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev,
ap_addr->sa_family = ARPHRD_ETHER;
wdev_lock(wdev);
- if (wdev->current_bss)
- memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
+ if (wdev->valid_links) {
+ wdev_unlock(wdev);
+ return -EOPNOTSUPP;
+ }
+ if (wdev->links[0].client.current_bss)
+ memcpy(ap_addr->sa_data,
+ wdev->links[0].client.current_bss->pub.bssid,
+ ETH_ALEN);
else
eth_zero_addr(ap_addr->sa_data);
wdev_unlock(wdev);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3583354a7d7f..3b55502b2965 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -719,6 +719,11 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
sk->sk_socket->state = SS_UNCONNECTED;
break;
}
+ rc = -ENOTCONN;
+ if (sk->sk_state == TCP_CLOSE) {
+ sk->sk_socket->state = SS_UNCONNECTED;
+ break;
+ }
rc = 0;
if (sk->sk_state != TCP_ESTABLISHED) {
release_sock(sk);
@@ -1315,8 +1320,7 @@ static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
} else {
/* Now we can treat all alike */
release_sock(sk);
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
lock_sock(sk);
if (!skb)
goto out;
@@ -1765,10 +1769,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
write_lock_bh(&x25_list_lock);
- sk_for_each(s, &x25_list)
- if (x25_sk(s)->neighbour == nb)
+ sk_for_each(s, &x25_list) {
+ if (x25_sk(s)->neighbour == nb) {
+ write_unlock_bh(&x25_list_lock);
+ lock_sock(s);
x25_disconnect(s, ENETUNREACH, 0, 0);
-
+ release_sock(s);
+ write_lock_bh(&x25_list_lock);
+ }
+ }
write_unlock_bh(&x25_list_lock);
/* Remove any related forwards */
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 3bddcbdf2e40..0412814a2295 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -79,7 +79,6 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
{
struct sock *s;
struct x25_sock *x25;
- struct net_device *dev;
const char *devname;
if (v == SEQ_START_TOKEN) {
@@ -91,7 +90,7 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
s = sk_entry(v);
x25 = x25_sk(s);
- if (!x25->neighbour || (dev = x25->neighbour->dev) == NULL)
+ if (!x25->neighbour || !x25->neighbour->dev)
devname = "???";
else
devname = x25->neighbour->dev->name;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index f01ef6bda390..4681e8e8ad94 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -19,8 +19,6 @@
#include "xdp_umem.h"
#include "xsk_queue.h"
-#define XDP_UMEM_MIN_CHUNK_SIZE 2048
-
static DEFINE_IDA(umem_ida);
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
@@ -57,7 +55,7 @@ static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
static void xdp_umem_release(struct xdp_umem *umem)
{
umem->zc = false;
- ida_simple_remove(&umem_ida, umem->id);
+ ida_free(&umem_ida, umem->id);
xdp_umem_addr_unmap(umem);
xdp_umem_unpin_pages(umem);
@@ -242,7 +240,7 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
if (!umem)
return ERR_PTR(-ENOMEM);
- err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+ err = ida_alloc(&umem_ida, GFP_KERNEL);
if (err < 0) {
kfree(umem);
return ERR_PTR(err);
@@ -251,7 +249,7 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
err = xdp_umem_reg(umem, mr);
if (err) {
- ida_simple_remove(&umem_ida, umem->id);
+ ida_free(&umem_ida, umem->id);
kfree(umem);
return ERR_PTR(err);
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 28ef3f4465ae..9f0561b67c12 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -184,7 +184,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
xsk_xdp = xsk_buff_alloc(xs->pool);
if (!xsk_xdp) {
xs->rx_dropped++;
- return -ENOSPC;
+ return -ENOMEM;
}
xsk_copy_xdp(xsk_xdp, xdp, len);
@@ -217,7 +217,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
{
if (!xsk_is_bound(xs))
- return -EINVAL;
+ return -ENXIO;
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
@@ -343,9 +343,9 @@ out:
}
EXPORT_SYMBOL(xsk_tx_peek_desc);
-static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs,
- u32 max_entries)
+static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries)
{
+ struct xdp_desc *descs = pool->tx_descs;
u32 nb_pkts = 0;
while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
@@ -355,17 +355,15 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d
return nb_pkts;
}
-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs,
- u32 max_entries)
+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts)
{
struct xdp_sock *xs;
- u32 nb_pkts;
rcu_read_lock();
if (!list_is_singular(&pool->xsk_tx_list)) {
/* Fallback to the non-batched version */
rcu_read_unlock();
- return xsk_tx_peek_release_fallback(pool, descs, max_entries);
+ return xsk_tx_peek_release_fallback(pool, nb_pkts);
}
xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
@@ -374,11 +372,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
goto out;
}
- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries);
- if (!nb_pkts) {
- xs->tx->queue_empty_descs++;
- goto out;
- }
+ nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts);
/* This is the backpressure mechanism for the Tx path. Try to
* reserve space in the completion queue for all packets, but
@@ -386,12 +380,18 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *
* packets. This avoids having to implement any buffering in
* the Tx path.
*/
- nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts);
+ nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts);
if (!nb_pkts)
goto out;
- xskq_cons_release_n(xs->tx, nb_pkts);
+ nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts);
+ if (!nb_pkts) {
+ xs->tx->queue_empty_descs++;
+ goto out;
+ }
+
__xskq_cons_release(xs->tx);
+ xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
xs->sk.sk_write_space(&xs->sk);
out:
@@ -403,18 +403,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
- int err;
- rcu_read_lock();
- err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
- rcu_read_unlock();
-
- return err;
-}
-
-static int xsk_zc_xmit(struct xdp_sock *xs)
-{
- return xsk_wakeup(xs, XDP_WAKEUP_TX);
+ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -533,6 +523,12 @@ static int xsk_generic_xmit(struct sock *sk)
mutex_lock(&xs->mutex);
+ /* Since we dropped the RCU read lock, the socket state might have changed. */
+ if (unlikely(!xsk_is_bound(xs))) {
+ err = -ENXIO;
+ goto out;
+ }
+
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
@@ -542,12 +538,6 @@ static int xsk_generic_xmit(struct sock *sk)
goto out;
}
- skb = xsk_build_skb(xs, &desc);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto out;
- }
-
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
@@ -556,11 +546,19 @@ static int xsk_generic_xmit(struct sock *sk)
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- kfree_skb(skb);
goto out;
}
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ skb = xsk_build_skb(xs, &desc);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ goto out;
+ }
+
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
@@ -596,16 +594,26 @@ out:
return err;
}
-static int __xsk_sendmsg(struct sock *sk)
+static int xsk_xmit(struct sock *sk)
{
struct xdp_sock *xs = xdp_sk(sk);
+ int ret;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
if (unlikely(!xs->tx))
return -ENOBUFS;
- return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+ if (xs->zc)
+ return xsk_wakeup(xs, XDP_WAKEUP_TX);
+
+ /* Drop the RCU lock since the SKB path might sleep. */
+ rcu_read_unlock();
+ ret = xsk_generic_xmit(sk);
+ /* Reaquire RCU lock before going into common code. */
+ rcu_read_lock();
+
+ return ret;
}
static bool xsk_no_wakeup(struct sock *sk)
@@ -619,7 +627,7 @@ static bool xsk_no_wakeup(struct sock *sk)
#endif
}
-static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
@@ -631,19 +639,33 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (unlikely(need_wait))
return -EOPNOTSUPP;
- if (sk_can_busy_loop(sk))
+ if (sk_can_busy_loop(sk)) {
+ if (xs->zc)
+ __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool));
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
+ }
- if (xsk_no_wakeup(sk))
+ if (xs->zc && xsk_no_wakeup(sk))
return 0;
pool = xs->pool;
if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
- return __xsk_sendmsg(sk);
+ return xsk_xmit(sk);
return 0;
}
-static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __xsk_sendmsg(sock, m, total_len);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
{
bool need_wait = !(flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
@@ -669,6 +691,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
return 0;
}
+static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __xsk_recvmsg(sock, m, len, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static __poll_t xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
@@ -679,8 +712,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait);
- if (unlikely(!xsk_is_bound(xs)))
+ rcu_read_lock();
+ if (unlikely(!xsk_is_bound(xs))) {
+ rcu_read_unlock();
return mask;
+ }
pool = xs->pool;
@@ -689,7 +725,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
xsk_wakeup(xs, pool->cached_need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
- __xsk_sendmsg(sk);
+ xsk_xmit(sk);
}
if (xs->rx && !xskq_prod_is_empty(xs->rx))
@@ -697,6 +733,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
if (xs->tx && xsk_tx_writeable(xs))
mask |= EPOLLOUT | EPOLLWRNORM;
+ rcu_read_unlock();
return mask;
}
@@ -728,7 +765,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
/* Wait for driver to stop using the xdp socket. */
xp_del_xsk(xs->pool, xs);
- xs->dev = NULL;
synchronize_net();
dev_put(dev);
}
@@ -918,8 +954,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
- err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
- dev, qid);
+ err = xp_assign_dev_shared(xs->pool, umem_xs, dev,
+ qid);
if (err) {
xp_destroy(xs->pool);
xs->pool = NULL;
@@ -937,6 +973,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xp_get_pool(umem_xs->pool);
xs->pool = umem_xs->pool;
+
+ /* If underlying shared umem was created without Tx
+ * ring, allocate Tx descs array that Tx batching API
+ * utilizes
+ */
+ if (xs->tx && !xs->pool->tx_descs) {
+ err = xp_alloc_tx_descs(xs->pool, xs);
+ if (err) {
+ xp_put_pool(xs->pool);
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+ }
}
xdp_get_umem(umem_xs->umem);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index fd39bb660ebc..ed6c71826d31 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -37,10 +37,21 @@ void xp_destroy(struct xsk_buff_pool *pool)
if (!pool)
return;
+ kvfree(pool->tx_descs);
kvfree(pool->heads);
kvfree(pool);
}
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
+{
+ pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
+ GFP_KERNEL);
+ if (!pool->tx_descs)
+ return -ENOMEM;
+
+ return 0;
+}
+
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
@@ -58,6 +69,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
+ if (xs->tx)
+ if (xp_alloc_tx_descs(pool, xs))
+ goto out;
+
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;
pool->heads_cnt = umem->chunks;
@@ -197,17 +212,18 @@ err_unreg_pool:
return err;
}
-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
struct net_device *dev, u16 queue_id)
{
u16 flags;
+ struct xdp_umem *umem = umem_xs->umem;
/* One fill and completion ring required for each queue id. */
if (!pool->fq || !pool->cq)
return -EINVAL;
flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
- if (pool->uses_need_wakeup)
+ if (umem_xs->pool->uses_need_wakeup)
flags |= XDP_USE_NEED_WAKEUP;
return xp_assign_dev(pool, dev, queue_id, flags);
@@ -317,6 +333,7 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs)
for (i = 0; i < dma_map->dma_pages_cnt; i++) {
dma = &dma_map->dma_pages[i];
if (*dma) {
+ *dma &= ~XSK_NEXT_PG_CONTIG_MASK;
dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
*dma = 0;
@@ -363,6 +380,16 @@ static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map)
static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map)
{
+ if (!pool->unaligned) {
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
+ }
+ }
+
pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL);
if (!pool->dma_pages)
return -ENOMEM;
@@ -412,12 +439,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
if (pool->unaligned)
xp_check_dma_contiguity(dma_map);
- else
- for (i = 0; i < pool->heads_cnt; i++) {
- struct xdp_buff_xsk *xskb = &pool->heads[i];
-
- xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
- }
err = xp_init_dma_info(pool, dma_map);
if (err) {
@@ -584,9 +605,13 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
u32 nb_entries1 = 0, nb_entries2;
if (unlikely(pool->dma_need_sync)) {
+ struct xdp_buff *buff;
+
/* Slow path */
- *xdp = xp_alloc(pool);
- return !!*xdp;
+ buff = xp_alloc(pool);
+ if (buff)
+ *xdp = buff;
+ return !!buff;
}
if (unlikely(pool->free_list_cnt)) {
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index e9aa2c236356..c6fb6b763658 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -205,11 +205,16 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
return false;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
- struct xdp_desc *descs,
- struct xsk_buff_pool *pool, u32 max)
+static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
+{
+ q->cached_cons += cnt;
+}
+
+static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
+ struct xdp_desc *descs = pool->tx_descs;
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
@@ -226,6 +231,8 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
cached_cons++;
}
+ /* Release valid plus any invalid entries */
+ xskq_cons_release_n(q, cached_cons - q->cached_cons);
return nb_entries;
}
@@ -263,7 +270,7 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max)
static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
{
- return xskq_cons_nb_entries(q, cnt) >= cnt ? true : false;
+ return xskq_cons_nb_entries(q, cnt) >= cnt;
}
static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
@@ -282,14 +289,6 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
return xskq_cons_read_desc(q, desc, pool);
}
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs,
- struct xsk_buff_pool *pool, u32 max)
-{
- u32 entries = xskq_cons_nb_entries(q, max);
-
- return xskq_cons_read_desc_batch(q, descs, pool, entries);
-}
-
/* To improve performance in the xskq_cons_release functions, only update local state here.
* Reflect this to global state when we get new entries from the ring in
* xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop.
@@ -299,18 +298,6 @@ static inline void xskq_cons_release(struct xsk_queue *q)
q->cached_cons++;
}
-static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
-{
- q->cached_cons += cnt;
-}
-
-static inline bool xskq_cons_is_full(struct xsk_queue *q)
-{
- /* No barriers needed since data is not accessed */
- return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
- q->nentries;
-}
-
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
@@ -365,21 +352,17 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
return 0;
}
-static inline u32 xskq_prod_reserve_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
- u32 max)
+static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
+ u32 nb_entries)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- u32 nb_entries, i, cached_prod;
-
- nb_entries = xskq_prod_nb_free(q, max);
+ u32 i, cached_prod;
/* A, matches D */
cached_prod = q->cached_prod;
for (i = 0; i < nb_entries; i++)
ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr;
q->cached_prod = cached_prod;
-
- return nb_entries;
}
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
@@ -389,7 +372,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
u32 idx;
if (xskq_prod_is_full(q))
- return -ENOSPC;
+ return -ENOBUFS;
/* A, matches D */
idx = q->cached_prod++ & q->ring_mask;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 65b53fb3de13..acc8e52a4f5f 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -9,6 +9,7 @@
#include <net/xdp_sock.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/btf_ids.h>
#include "xsk.h"
@@ -254,7 +255,7 @@ static bool xsk_map_meta_equal(const struct bpf_map *meta0,
bpf_map_meta_equal(meta0, meta1);
}
-static int xsk_map_btf_id;
+BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map)
const struct bpf_map_ops xsk_map_ops = {
.map_meta_equal = xsk_map_meta_equal,
.map_alloc = xsk_map_alloc,
@@ -266,7 +267,6 @@ const struct bpf_map_ops xsk_map_ops = {
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "xsk_map",
- .map_btf_id = &xsk_map_btf_id,
+ .map_btf_id = &xsk_map_btf_ids[0],
.map_redirect = xsk_map_redirect,
};
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 1f08ebf7d80c..29a540dcb5a7 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -91,7 +91,7 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
}
/* remove header, leave non-ESP marker/SPI */
- if (!__pskb_pull(skb, rxm->offset + 2)) {
+ if (!pskb_pull(skb, rxm->offset + 2)) {
XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
@@ -131,7 +131,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
}
static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
struct sk_buff *skb;
@@ -139,8 +139,6 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int copied;
int off = 0;
- flags |= nonblock ? MSG_DONTWAIT : 0;
-
skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
if (!skb) {
if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
@@ -170,7 +168,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
+ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3fa066419d37..5f5aafd418af 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -117,7 +117,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
- if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN)
return skb;
/* This skb was already validated on the upper/virtual dev */
@@ -143,7 +143,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
segs = skb_gso_segment(skb, esp_features);
if (IS_ERR(segs)) {
kfree_skb(skb);
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
return NULL;
} else {
consume_skb(skb);
@@ -207,21 +207,31 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
- struct xfrm_user_offload *xuo)
+ struct xfrm_user_offload *xuo,
+ struct netlink_ext_ack *extack)
{
int err;
struct dst_entry *dst;
struct net_device *dev;
- struct xfrm_state_offload *xso = &x->xso;
+ struct xfrm_dev_offload *xso = &x->xso;
xfrm_address_t *saddr;
xfrm_address_t *daddr;
- if (!x->type_offload)
+ if (!x->type_offload) {
+ NL_SET_ERR_MSG(extack, "Type doesn't support offload");
return -EINVAL;
+ }
/* We don't yet support UDP encapsulation and TFC padding. */
- if (x->encap || x->tfcpad)
+ if (x->encap || x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
return -EINVAL;
+ }
+
+ if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) {
+ NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
+ return -EINVAL;
+ }
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
@@ -253,6 +263,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (x->props.flags & XFRM_STATE_ESN &&
!dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN");
xso->dev = NULL;
dev_put(dev);
return -EINVAL;
@@ -261,19 +272,23 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
xso->dev = dev;
netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC);
xso->real_dev = dev;
- xso->num_exthdrs = 1;
- xso->flags = xuo->flags;
+
+ if (xuo->flags & XFRM_OFFLOAD_INBOUND)
+ xso->dir = XFRM_DEV_OFFLOAD_IN;
+ else
+ xso->dir = XFRM_DEV_OFFLOAD_OUT;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
- xso->num_exthdrs = 0;
- xso->flags = 0;
xso->dev = NULL;
+ xso->dir = 0;
xso->real_dev = NULL;
- dev_put_track(dev, &xso->dev_tracker);
+ netdev_put(dev, &xso->dev_tracker);
- if (err != -EOPNOTSUPP)
+ if (err != -EOPNOTSUPP) {
+ NL_SET_ERR_MSG(extack, "Device failed to offload this state");
return err;
+ }
}
return 0;
@@ -380,16 +395,6 @@ static int xfrm_api_check(struct net_device *dev)
return NOTIFY_DONE;
}
-static int xfrm_dev_register(struct net_device *dev)
-{
- return xfrm_api_check(dev);
-}
-
-static int xfrm_dev_feat_change(struct net_device *dev)
-{
- return xfrm_api_check(dev);
-}
-
static int xfrm_dev_down(struct net_device *dev)
{
if (dev->features & NETIF_F_HW_ESP)
@@ -404,10 +409,10 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
switch (event) {
case NETDEV_REGISTER:
- return xfrm_dev_register(dev);
+ return xfrm_api_check(dev);
case NETDEV_FEAT_CHANGE:
- return xfrm_dev_feat_change(dev);
+ return xfrm_api_check(dev);
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 144238a50f3d..97074f6f2bde 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -20,11 +20,13 @@
#include <net/xfrm.h>
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
+#include <net/dst_metadata.h>
#include "xfrm_inout.h"
struct xfrm_trans_tasklet {
- struct tasklet_struct tasklet;
+ struct work_struct work;
+ spinlock_t queue_lock;
struct sk_buff_head queue;
};
@@ -669,7 +671,6 @@ resume:
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock(&x->lock);
@@ -720,7 +721,8 @@ resume:
sp = skb_sec_path(skb);
if (sp)
sp->olen = 0;
- skb_dst_drop(skb);
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return 0;
} else {
@@ -738,7 +740,8 @@ resume:
sp = skb_sec_path(skb);
if (sp)
sp->olen = 0;
- skb_dst_drop(skb);
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return err;
}
@@ -761,18 +764,22 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
}
EXPORT_SYMBOL(xfrm_input_resume);
-static void xfrm_trans_reinject(struct tasklet_struct *t)
+static void xfrm_trans_reinject(struct work_struct *work)
{
- struct xfrm_trans_tasklet *trans = from_tasklet(trans, t, tasklet);
+ struct xfrm_trans_tasklet *trans = container_of(work, struct xfrm_trans_tasklet, work);
struct sk_buff_head queue;
struct sk_buff *skb;
__skb_queue_head_init(&queue);
+ spin_lock_bh(&trans->queue_lock);
skb_queue_splice_init(&trans->queue, &queue);
+ spin_unlock_bh(&trans->queue_lock);
+ local_bh_disable();
while ((skb = __skb_dequeue(&queue)))
XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
NULL, skb);
+ local_bh_enable();
}
int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
@@ -783,15 +790,17 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
XFRM_TRANS_SKB_CB(skb)->finish = finish;
XFRM_TRANS_SKB_CB(skb)->net = net;
+ spin_lock_bh(&trans->queue_lock);
__skb_queue_tail(&trans->queue, skb);
- tasklet_schedule(&trans->tasklet);
+ spin_unlock_bh(&trans->queue_lock);
+ schedule_work(&trans->work);
return 0;
}
EXPORT_SYMBOL(xfrm_trans_queue_net);
@@ -818,7 +827,8 @@ void __init xfrm_input_init(void)
struct xfrm_trans_tasklet *trans;
trans = &per_cpu(xfrm_trans_tasklet, i);
+ spin_lock_init(&trans->queue_lock);
__skb_queue_head_init(&trans->queue);
- tasklet_setup(&trans->tasklet, xfrm_trans_reinject);
+ INIT_WORK(&trans->work, xfrm_trans_reinject);
}
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 57448fc519fc..5a67b120c4db 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -41,6 +41,7 @@
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
+#include <net/dst_metadata.h>
#include <net/netns/generic.h>
#include <linux/etherdevice.h>
@@ -56,6 +57,89 @@ static const struct net_device_ops xfrmi_netdev_ops;
struct xfrmi_net {
/* lists for storing interfaces in use */
struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
+ struct xfrm_if __rcu *collect_md_xfrmi;
+};
+
+static const struct nla_policy xfrm_lwt_policy[LWT_XFRM_MAX + 1] = {
+ [LWT_XFRM_IF_ID] = NLA_POLICY_MIN(NLA_U32, 1),
+ [LWT_XFRM_LINK] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+static void xfrmi_destroy_state(struct lwtunnel_state *lwt)
+{
+}
+
+static int xfrmi_build_state(struct net *net, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWT_XFRM_MAX + 1];
+ struct lwtunnel_state *new_state;
+ struct xfrm_md_info *info;
+ int ret;
+
+ ret = nla_parse_nested(tb, LWT_XFRM_MAX, nla, xfrm_lwt_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[LWT_XFRM_IF_ID]) {
+ NL_SET_ERR_MSG(extack, "if_id must be set");
+ return -EINVAL;
+ }
+
+ new_state = lwtunnel_state_alloc(sizeof(*info));
+ if (!new_state) {
+ NL_SET_ERR_MSG(extack, "failed to create encap info");
+ return -ENOMEM;
+ }
+
+ new_state->type = LWTUNNEL_ENCAP_XFRM;
+
+ info = lwt_xfrm_info(new_state);
+
+ info->if_id = nla_get_u32(tb[LWT_XFRM_IF_ID]);
+
+ if (tb[LWT_XFRM_LINK])
+ info->link = nla_get_u32(tb[LWT_XFRM_LINK]);
+
+ *ts = new_state;
+ return 0;
+}
+
+static int xfrmi_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwt)
+{
+ struct xfrm_md_info *info = lwt_xfrm_info(lwt);
+
+ if (nla_put_u32(skb, LWT_XFRM_IF_ID, info->if_id) ||
+ (info->link && nla_put_u32(skb, LWT_XFRM_LINK, info->link)))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int xfrmi_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ return nla_total_size(sizeof(u32)) + /* LWT_XFRM_IF_ID */
+ nla_total_size(sizeof(u32)); /* LWT_XFRM_LINK */
+}
+
+static int xfrmi_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct xfrm_md_info *a_info = lwt_xfrm_info(a);
+ struct xfrm_md_info *b_info = lwt_xfrm_info(b);
+
+ return memcmp(a_info, b_info, sizeof(*a_info));
+}
+
+static const struct lwtunnel_encap_ops xfrmi_encap_ops = {
+ .build_state = xfrmi_build_state,
+ .destroy_state = xfrmi_destroy_state,
+ .fill_encap = xfrmi_fill_encap_info,
+ .get_encap_size = xfrmi_encap_nlsize,
+ .cmp_encap = xfrmi_encap_cmp,
+ .owner = THIS_MODULE,
};
#define for_each_xfrmi_rcu(start, xi) \
@@ -77,17 +161,23 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
return xi;
}
+ xi = rcu_dereference(xfrmn->collect_md_xfrmi);
+ if (xi && (xi->dev->flags & IFF_UP))
+ return xi;
+
return NULL;
}
-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
- unsigned short family)
+static bool xfrmi_decode_session(struct sk_buff *skb,
+ unsigned short family,
+ struct xfrm_if_decode_session_result *res)
{
struct net_device *dev;
+ struct xfrm_if *xi;
int ifindex = 0;
if (!secpath_exists(skb) || !skb->dev)
- return NULL;
+ return false;
switch (family) {
case AF_INET6:
@@ -107,11 +197,18 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
}
if (!dev || !(dev->flags & IFF_UP))
- return NULL;
+ return false;
if (dev->netdev_ops != &xfrmi_netdev_ops)
- return NULL;
+ return false;
+
+ xi = netdev_priv(dev);
+ res->net = xi->net;
- return netdev_priv(dev);
+ if (xi->p.collect_md)
+ res->if_id = xfrm_input_state(skb)->if_id;
+ else
+ res->if_id = xi->p.if_id;
+ return true;
}
static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
@@ -157,7 +254,10 @@ static int xfrmi_create(struct net_device *dev)
if (err < 0)
goto out;
- xfrmi_link(xfrmn, xi);
+ if (xi->p.collect_md)
+ rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi);
+ else
+ xfrmi_link(xfrmn, xi);
return 0;
@@ -185,12 +285,15 @@ static void xfrmi_dev_uninit(struct net_device *dev)
struct xfrm_if *xi = netdev_priv(dev);
struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
- xfrmi_unlink(xfrmn, xi);
+ if (xi->p.collect_md)
+ RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL);
+ else
+ xfrmi_unlink(xfrmn, xi);
}
static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
{
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
skb->ignore_df = 0;
@@ -214,6 +317,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
struct xfrm_state *x;
struct xfrm_if *xi;
bool xnet;
+ int link;
if (err && !secpath_exists(skb))
return 0;
@@ -224,6 +328,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
if (!xi)
return 1;
+ link = skb->dev->ifindex;
dev = xi->dev;
skb->dev = dev;
@@ -254,6 +359,17 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
}
xfrmi_scrub_packet(skb, xnet);
+ if (xi->p.collect_md) {
+ struct metadata_dst *md_dst;
+
+ md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC);
+ if (!md_dst)
+ return -ENOMEM;
+
+ md_dst->u.xfrm_info.if_id = x->if_id;
+ md_dst->u.xfrm_info.link = link;
+ skb_dst_set(skb, (struct dst_entry *)md_dst);
+ }
dev_sw_netstats_rx_add(dev, skb->len);
return 0;
@@ -269,10 +385,23 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct net_device *tdev;
struct xfrm_state *x;
int err = -1;
+ u32 if_id;
int mtu;
+ if (xi->p.collect_md) {
+ struct xfrm_md_info *md_info = skb_xfrm_md_info(skb);
+
+ if (unlikely(!md_info))
+ return -EINVAL;
+
+ if_id = md_info->if_id;
+ fl->flowi_oif = md_info->link;
+ } else {
+ if_id = xi->p.if_id;
+ }
+
dst_hold(dst);
- dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
+ dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
@@ -283,7 +412,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (!x)
goto tx_err_link_failure;
- if (x->if_id != xi->p.if_id)
+ if (x->if_id != if_id)
goto tx_err_link_failure;
tdev = dst->dev;
@@ -304,7 +433,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ if (skb->len > 1280)
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ else
+ goto xmit;
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;
@@ -630,6 +762,9 @@ static void xfrmi_netlink_parms(struct nlattr *data[],
if (data[IFLA_XFRM_IF_ID])
parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
+
+ if (data[IFLA_XFRM_COLLECT_METADATA])
+ parms->collect_md = true;
}
static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
@@ -642,14 +777,27 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
int err;
xfrmi_netlink_parms(data, &p);
- if (!p.if_id) {
- NL_SET_ERR_MSG(extack, "if_id must be non zero");
- return -EINVAL;
- }
+ if (p.collect_md) {
+ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
- xi = xfrmi_locate(net, &p);
- if (xi)
- return -EEXIST;
+ if (p.link || p.if_id) {
+ NL_SET_ERR_MSG(extack, "link and if_id must be zero");
+ return -EINVAL;
+ }
+
+ if (rtnl_dereference(xfrmn->collect_md_xfrmi))
+ return -EEXIST;
+
+ } else {
+ if (!p.if_id) {
+ NL_SET_ERR_MSG(extack, "if_id must be non zero");
+ return -EINVAL;
+ }
+
+ xi = xfrmi_locate(net, &p);
+ if (xi)
+ return -EEXIST;
+ }
xi = netdev_priv(dev);
xi->p = p;
@@ -673,18 +821,28 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
struct net *net = xi->net;
struct xfrm_if_parms p = {};
+ xfrmi_netlink_parms(data, &p);
if (!p.if_id) {
NL_SET_ERR_MSG(extack, "if_id must be non zero");
return -EINVAL;
}
- xfrmi_netlink_parms(data, &p);
+ if (p.collect_md) {
+ NL_SET_ERR_MSG(extack, "collect_md can't be changed");
+ return -EINVAL;
+ }
+
xi = xfrmi_locate(net, &p);
if (!xi) {
xi = netdev_priv(dev);
} else {
if (xi->dev != dev)
return -EEXIST;
+ if (xi->p.collect_md) {
+ NL_SET_ERR_MSG(extack,
+ "device can't be changed to collect_md");
+ return -EINVAL;
+ }
}
return xfrmi_update(xi, &p);
@@ -697,6 +855,8 @@ static size_t xfrmi_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_XFRM_IF_ID */
nla_total_size(4) +
+ /* IFLA_XFRM_COLLECT_METADATA */
+ nla_total_size(0) +
0;
}
@@ -706,7 +866,8 @@ static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct xfrm_if_parms *parm = &xi->p;
if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
- nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
+ nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) ||
+ (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA)))
goto nla_put_failure;
return 0;
@@ -722,8 +883,10 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
}
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
- [IFLA_XFRM_LINK] = { .type = NLA_U32 },
- [IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
+ [IFLA_XFRM_UNSPEC] = { .strict_start_type = IFLA_XFRM_COLLECT_METADATA },
+ [IFLA_XFRM_LINK] = { .type = NLA_U32 },
+ [IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
+ [IFLA_XFRM_COLLECT_METADATA] = { .type = NLA_FLAG },
};
static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
@@ -759,6 +922,9 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
xip = &xi->next)
unregister_netdevice_queue(xi->dev, &list);
}
+ xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
+ if (xi)
+ unregister_netdevice_queue(xi->dev, &list);
}
unregister_netdevice_many(&list);
rtnl_unlock();
@@ -996,6 +1162,8 @@ static int __init xfrmi_init(void)
if (err < 0)
goto rtnl_link_failed;
+ lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
+
xfrm_if_register_cb(&xfrm_if_cb);
return err;
@@ -1014,6 +1182,7 @@ pernet_dev_failed:
static void __exit xfrmi_fini(void)
{
xfrm_if_unregister_cb();
+ lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
rtnl_link_unregister(&xfrmi_link_ops);
xfrmi4_fini();
xfrmi6_fini();
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index cb40ff0ff28d..80143360bf09 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -203,6 +203,7 @@ static void ipcomp_free_scratches(void)
vfree(*per_cpu_ptr(scratches, i));
free_percpu(scratches);
+ ipcomp_scratches = NULL;
}
static void * __percpu *ipcomp_alloc_scratches(void)
@@ -325,18 +326,22 @@ void ipcomp_destroy(struct xfrm_state *x)
}
EXPORT_SYMBOL_GPL(ipcomp_destroy);
-int ipcomp_init_state(struct xfrm_state *x)
+int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
int err;
struct ipcomp_data *ipcd;
struct xfrm_algo_desc *calg_desc;
err = -EINVAL;
- if (!x->calg)
+ if (!x->calg) {
+ NL_SET_ERR_MSG(extack, "Missing required compression algorithm");
goto out;
+ }
- if (x->encap)
+ if (x->encap) {
+ NL_SET_ERR_MSG(extack, "IPComp is not compatible with encapsulation");
goto out;
+ }
err = -ENOMEM;
ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index d4935b3b9983..9a5e79a38c67 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -273,6 +273,7 @@ static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
{
+ bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU);
struct dst_entry *dst = skb_dst(skb);
struct iphdr *top_iph;
int flags;
@@ -303,7 +304,7 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
if (flags & XFRM_STATE_NOECN)
IP_ECN_clear(top_iph);
- top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+ top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
@@ -533,7 +534,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock_bh(&x->lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dccb8f3318ef..e392d8d05e0c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -31,6 +31,7 @@
#include <linux/if_tunnel.h>
#include <net/dst.h>
#include <net/flow.h>
+#include <net/inet_ecn.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/gre.h>
@@ -1888,7 +1889,7 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
*/
static int xfrm_policy_match(const struct xfrm_policy *pol,
const struct flowi *fl,
- u8 type, u16 family, int dir, u32 if_id)
+ u8 type, u16 family, u32 if_id)
{
const struct xfrm_selector *sel = &pol->selector;
int ret = -ESRCH;
@@ -2013,7 +2014,7 @@ static struct xfrm_policy *
__xfrm_policy_eval_candidates(struct hlist_head *chain,
struct xfrm_policy *prefer,
const struct flowi *fl,
- u8 type, u16 family, int dir, u32 if_id)
+ u8 type, u16 family, u32 if_id)
{
u32 priority = prefer ? prefer->priority : ~0u;
struct xfrm_policy *pol;
@@ -2027,7 +2028,7 @@ __xfrm_policy_eval_candidates(struct hlist_head *chain,
if (pol->priority > priority)
break;
- err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+ err = xfrm_policy_match(pol, fl, type, family, if_id);
if (err) {
if (err != -ESRCH)
return ERR_PTR(err);
@@ -2052,7 +2053,7 @@ static struct xfrm_policy *
xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
struct xfrm_policy *prefer,
const struct flowi *fl,
- u8 type, u16 family, int dir, u32 if_id)
+ u8 type, u16 family, u32 if_id)
{
struct xfrm_policy *tmp;
int i;
@@ -2060,8 +2061,7 @@ xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
tmp = __xfrm_policy_eval_candidates(cand->res[i],
prefer,
- fl, type, family, dir,
- if_id);
+ fl, type, family, if_id);
if (!tmp)
continue;
@@ -2100,7 +2100,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
ret = NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
- err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
+ err = xfrm_policy_match(pol, fl, type, family, if_id);
if (err) {
if (err == -ESRCH)
continue;
@@ -2119,7 +2119,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
goto skip_inexact;
pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
- family, dir, if_id);
+ family, if_id);
if (pol) {
ret = pol;
if (IS_ERR(pol))
@@ -2592,12 +2592,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
__u32 mark = 0;
+ int oif;
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
family = xfrm[i]->props.family;
- dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+ oif = fl->flowi_oif ? : fl->flowi_l3mdev;
+ dst = xfrm_dst_lookup(xfrm[i], tos, oif,
&saddr, &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
@@ -2675,8 +2677,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
@@ -2691,6 +2695,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
@@ -3156,8 +3161,8 @@ ok:
return dst;
nopol:
- if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
- !xfrm_default_allow(net, dir)) {
+ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
+ net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
err = -EPERM;
goto error;
}
@@ -3295,7 +3300,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
fl4->flowi4_proto = iph->protocol;
fl4->daddr = reverse ? iph->saddr : iph->daddr;
fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
+ fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
@@ -3510,17 +3515,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int xerr_idx = -1;
const struct xfrm_if_cb *ifcb;
struct sec_path *sp;
- struct xfrm_if *xi;
u32 if_id = 0;
rcu_read_lock();
ifcb = xfrm_if_get_cb();
if (ifcb) {
- xi = ifcb->decode_session(skb, family);
- if (xi) {
- if_id = xi->p.if_id;
- net = xi->net;
+ struct xfrm_if_decode_session_result r;
+
+ if (ifcb->decode_session(skb, family, &r)) {
+ if_id = r.if_id;
+ net = r.net;
}
}
rcu_read_unlock();
@@ -3568,7 +3573,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
if (!pol) {
- if (!xfrm_default_allow(net, dir)) {
+ if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
return 0;
}
@@ -3593,6 +3598,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
pols[1]->curlft.use_time = ktime_get_real_seconds();
@@ -3628,7 +3634,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
xfrm_nr = ti;
- if (!xfrm_default_allow(net, dir) && !xfrm_nr) {
+ if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK &&
+ !xfrm_nr) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
goto reject;
}
@@ -3740,7 +3747,7 @@ static int stale_bundle(struct dst_entry *dst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
- dst->dev = dev_net(dev)->loopback_dev;
+ dst->dev = blackhole_netdev;
dev_hold(dst->dev);
dev_put(dev);
}
@@ -4117,6 +4124,9 @@ static int __net_init xfrm_net_init(struct net *net)
spin_lock_init(&net->xfrm.xfrm_policy_lock);
seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
+ net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT;
+ net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT;
+ net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT;
rv = xfrm_statistics_init(net);
if (rv < 0)
@@ -4255,7 +4265,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
}
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
- u8 dir, u8 type, struct net *net)
+ u8 dir, u8 type, struct net *net, u32 if_id)
{
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
@@ -4264,7 +4274,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
priority = ret->priority;
@@ -4276,7 +4287,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
if ((pol->priority >= priority) && ret)
break;
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
break;
@@ -4392,7 +4404,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_migrate,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap)
+ struct xfrm_encap_tmpl *encap, u32 if_id)
{
int i, err, nx_cur = 0, nx_new = 0;
struct xfrm_policy *pol = NULL;
@@ -4411,14 +4423,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 1 - find policy */
- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
+ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
err = -ENOENT;
goto out;
}
/* Stage 2 - find and update state(s) */
for (i = 0, mp = m; i < num_migrate; i++, mp++) {
- if ((x = xfrm_migrate_state_find(mp, net))) {
+ if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
x_cur[nx_cur] = x;
nx_cur++;
xc = xfrm_state_migrate(x, mp, encap);
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 9277d81b344c..9f4d42eb090f 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -766,18 +766,22 @@ int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
}
#endif
-int xfrm_init_replay(struct xfrm_state *x)
+int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack)
{
struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
if (replay_esn) {
if (replay_esn->replay_window >
- replay_esn->bmp_len * sizeof(__u32) * 8)
+ replay_esn->bmp_len * sizeof(__u32) * 8) {
+ NL_SET_ERR_MSG(extack, "ESN replay window is too large for the chosen bitmap size");
return -EINVAL;
+ }
if (x->props.flags & XFRM_STATE_ESN) {
- if (replay_esn->replay_window == 0)
+ if (replay_esn->replay_window == 0) {
+ NL_SET_ERR_MSG(extack, "ESN replay window must be > 0");
return -EINVAL;
+ }
x->repl_mode = XFRM_REPLAY_MODE_ESN;
} else {
x->repl_mode = XFRM_REPLAY_MODE_BMP;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ca6bee18346d..3d2fe7712ac5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -751,7 +751,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
- struct xfrm_state_offload *xso;
+ struct xfrm_dev_offload *xso;
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
xso = &x->xso;
@@ -835,7 +835,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
err = -ESRCH;
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
- struct xfrm_state_offload *xso;
+ struct xfrm_dev_offload *xso;
restart:
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
xso = &x->xso;
@@ -1579,9 +1579,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
- if (xfrm_init_state(x) < 0)
- goto error;
-
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
@@ -1595,6 +1592,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->replay = orig->replay;
x->preplay = orig->preplay;
x->mapping_maxage = orig->mapping_maxage;
+ x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
@@ -1606,7 +1604,8 @@ out:
return NULL;
}
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id)
{
unsigned int h;
struct xfrm_state *x = NULL;
@@ -1622,6 +1621,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
continue;
if (m->reqid && x->props.reqid != m->reqid)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1637,6 +1638,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1663,6 +1666,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
if (!xc)
return NULL;
+ xc->props.family = m->new_family;
+
+ if (xfrm_init_state(xc) < 0)
+ goto error;
+
memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
@@ -2064,7 +2072,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
} else {
u32 spi = 0;
for (h = 0; h < high-low+1; h++) {
- spi = low + prandom_u32()%(high-low+1);
+ spi = low + prandom_u32_max(high - low + 1);
x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) {
newspi = htonl(spi);
@@ -2474,22 +2482,20 @@ EXPORT_SYMBOL(xfrm_user_policy);
static DEFINE_SPINLOCK(xfrm_km_lock);
-int xfrm_register_km(struct xfrm_mgr *km)
+void xfrm_register_km(struct xfrm_mgr *km)
{
spin_lock_bh(&xfrm_km_lock);
list_add_tail_rcu(&km->list, &xfrm_km_list);
spin_unlock_bh(&xfrm_km_lock);
- return 0;
}
EXPORT_SYMBOL(xfrm_register_km);
-int xfrm_unregister_km(struct xfrm_mgr *km)
+void xfrm_unregister_km(struct xfrm_mgr *km)
{
spin_lock_bh(&xfrm_km_lock);
list_del_rcu(&km->list);
spin_unlock_bh(&xfrm_km_lock);
synchronize_rcu();
- return 0;
}
EXPORT_SYMBOL(xfrm_unregister_km);
@@ -2572,7 +2578,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
@@ -2603,19 +2609,10 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-EXPORT_SYMBOL_GPL(__xfrm_state_mtu);
+EXPORT_SYMBOL_GPL(xfrm_state_mtu);
-u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
-{
- mtu = __xfrm_state_mtu(x, mtu);
-
- if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
- return IPV6_MIN_MTU;
-
- return mtu;
-}
-
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
+ struct netlink_ext_ack *extack)
{
const struct xfrm_mode *inner_mode;
const struct xfrm_mode *outer_mode;
@@ -2623,19 +2620,23 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
int err;
if (family == AF_INET &&
- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
if (x->sel.family != AF_UNSPEC) {
inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
- if (inner_mode == NULL)
+ if (inner_mode == NULL) {
+ NL_SET_ERR_MSG(extack, "Requested mode not found");
goto error;
+ }
if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
- family != x->sel.family)
+ family != x->sel.family) {
+ NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate a change of family");
goto error;
+ }
x->inner_mode = *inner_mode;
} else {
@@ -2643,11 +2644,15 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
int iafamily = AF_INET;
inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
- if (inner_mode == NULL)
+ if (inner_mode == NULL) {
+ NL_SET_ERR_MSG(extack, "Requested mode not found");
goto error;
+ }
- if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
+ if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
+ NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector");
goto error;
+ }
x->inner_mode = *inner_mode;
@@ -2662,24 +2667,27 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
}
x->type = xfrm_get_type(x->id.proto, family);
- if (x->type == NULL)
+ if (x->type == NULL) {
+ NL_SET_ERR_MSG(extack, "Requested type not found");
goto error;
+ }
x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
- err = x->type->init_state(x);
+ err = x->type->init_state(x, extack);
if (err)
goto error;
outer_mode = xfrm_get_mode(x->props.mode, family);
if (!outer_mode) {
+ NL_SET_ERR_MSG(extack, "Requested mode not found");
err = -EPROTONOSUPPORT;
goto error;
}
x->outer_mode = *outer_mode;
if (init_replay) {
- err = xfrm_init_replay(x);
+ err = xfrm_init_replay(x, extack);
if (err)
goto error;
}
@@ -2694,7 +2702,7 @@ int xfrm_init_state(struct xfrm_state *x)
{
int err;
- err = __xfrm_init_state(x, true, false);
+ err = __xfrm_init_state(x, true, false, NULL);
if (!err)
x->km.state = XFRM_STATE_VALID;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8cd6c8129004..e73f9efc54c1 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -35,7 +35,8 @@
#endif
#include <asm/unaligned.h>
-static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
+static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type,
+ struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[type];
struct xfrm_algo *algp;
@@ -44,8 +45,10 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < (int)xfrm_alg_len(algp))
+ if (nla_len(rt) < (int)xfrm_alg_len(algp)) {
+ NL_SET_ERR_MSG(extack, "Invalid AUTH/CRYPT/COMP attribute length");
return -EINVAL;
+ }
switch (type) {
case XFRMA_ALG_AUTH:
@@ -54,6 +57,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid algorithm attribute type");
return -EINVAL;
}
@@ -61,7 +65,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
return 0;
}
-static int verify_auth_trunc(struct nlattr **attrs)
+static int verify_auth_trunc(struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC];
struct xfrm_algo_auth *algp;
@@ -70,14 +75,16 @@ static int verify_auth_trunc(struct nlattr **attrs)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < (int)xfrm_alg_auth_len(algp))
+ if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) {
+ NL_SET_ERR_MSG(extack, "Invalid AUTH_TRUNC attribute length");
return -EINVAL;
+ }
algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
return 0;
}
-static int verify_aead(struct nlattr **attrs)
+static int verify_aead(struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_ALG_AEAD];
struct xfrm_algo_aead *algp;
@@ -86,8 +93,10 @@ static int verify_aead(struct nlattr **attrs)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < (int)aead_len(algp))
+ if (nla_len(rt) < (int)aead_len(algp)) {
+ NL_SET_ERR_MSG(extack, "Invalid AEAD attribute length");
return -EINVAL;
+ }
algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
return 0;
@@ -102,7 +111,7 @@ static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type,
*addrp = nla_data(rt);
}
-static inline int verify_sec_ctx_len(struct nlattr **attrs)
+static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_user_sec_ctx *uctx;
@@ -112,42 +121,59 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs)
uctx = nla_data(rt);
if (uctx->len > nla_len(rt) ||
- uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len))
+ uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) {
+ NL_SET_ERR_MSG(extack, "Invalid security context length");
return -EINVAL;
+ }
return 0;
}
static inline int verify_replay(struct xfrm_usersa_info *p,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
struct xfrm_replay_state_esn *rs;
- if (!rt)
- return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0;
+ if (!rt) {
+ if (p->flags & XFRM_STATE_ESN) {
+ NL_SET_ERR_MSG(extack, "Missing required attribute for ESN");
+ return -EINVAL;
+ }
+ return 0;
+ }
rs = nla_data(rt);
- if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+ if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) {
+ NL_SET_ERR_MSG(extack, "ESN bitmap length must be <= 128");
return -EINVAL;
+ }
if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
- nla_len(rt) != sizeof(*rs))
+ nla_len(rt) != sizeof(*rs)) {
+ NL_SET_ERR_MSG(extack, "ESN attribute is too short to fit the full bitmap length");
return -EINVAL;
+ }
/* As only ESP and AH support ESN feature. */
- if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
+ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) {
+ NL_SET_ERR_MSG(extack, "ESN only supported for ESP and AH");
return -EINVAL;
+ }
- if (p->replay_window != 0)
+ if (p->replay_window != 0) {
+ NL_SET_ERR_MSG(extack, "ESN not compatible with legacy replay_window");
return -EINVAL;
+ }
return 0;
}
static int verify_newsa_info(struct xfrm_usersa_info *p,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -161,10 +187,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
break;
#else
err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "IPv6 support disabled");
goto out;
#endif
default:
+ NL_SET_ERR_MSG(extack, "Invalid address family");
goto out;
}
@@ -173,65 +201,98 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
break;
case AF_INET:
- if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
+ if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 32 for IPv4)");
goto out;
+ }
break;
case AF_INET6:
#if IS_ENABLED(CONFIG_IPV6)
- if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128)
+ if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 128 for IPv6)");
goto out;
+ }
break;
#else
+ NL_SET_ERR_MSG(extack, "IPv6 support disabled");
err = -EAFNOSUPPORT;
goto out;
#endif
default:
+ NL_SET_ERR_MSG(extack, "Invalid address family in selector");
goto out;
}
err = -EINVAL;
switch (p->id.proto) {
case IPPROTO_AH:
- if ((!attrs[XFRMA_ALG_AUTH] &&
- !attrs[XFRMA_ALG_AUTH_TRUNC]) ||
- attrs[XFRMA_ALG_AEAD] ||
+ if (!attrs[XFRMA_ALG_AUTH] &&
+ !attrs[XFRMA_ALG_AUTH_TRUNC]) {
+ NL_SET_ERR_MSG(extack, "Missing required attribute for AH: AUTH_TRUNC or AUTH");
+ goto out;
+ }
+
+ if (attrs[XFRMA_ALG_AEAD] ||
attrs[XFRMA_ALG_CRYPT] ||
attrs[XFRMA_ALG_COMP] ||
- attrs[XFRMA_TFCPAD])
+ attrs[XFRMA_TFCPAD]) {
+ NL_SET_ERR_MSG(extack, "Invalid attributes for AH: AEAD, CRYPT, COMP, TFCPAD");
goto out;
+ }
break;
case IPPROTO_ESP:
- if (attrs[XFRMA_ALG_COMP])
+ if (attrs[XFRMA_ALG_COMP]) {
+ NL_SET_ERR_MSG(extack, "Invalid attribute for ESP: COMP");
goto out;
+ }
+
if (!attrs[XFRMA_ALG_AUTH] &&
!attrs[XFRMA_ALG_AUTH_TRUNC] &&
!attrs[XFRMA_ALG_CRYPT] &&
- !attrs[XFRMA_ALG_AEAD])
+ !attrs[XFRMA_ALG_AEAD]) {
+ NL_SET_ERR_MSG(extack, "Missing required attribute for ESP: at least one of AUTH, AUTH_TRUNC, CRYPT, AEAD");
goto out;
+ }
+
if ((attrs[XFRMA_ALG_AUTH] ||
attrs[XFRMA_ALG_AUTH_TRUNC] ||
attrs[XFRMA_ALG_CRYPT]) &&
- attrs[XFRMA_ALG_AEAD])
+ attrs[XFRMA_ALG_AEAD]) {
+ NL_SET_ERR_MSG(extack, "Invalid attribute combination for ESP: AEAD can't be used with AUTH, AUTH_TRUNC, CRYPT");
goto out;
+ }
+
if (attrs[XFRMA_TFCPAD] &&
- p->mode != XFRM_MODE_TUNNEL)
+ p->mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode");
goto out;
+ }
break;
case IPPROTO_COMP:
- if (!attrs[XFRMA_ALG_COMP] ||
- attrs[XFRMA_ALG_AEAD] ||
+ if (!attrs[XFRMA_ALG_COMP]) {
+ NL_SET_ERR_MSG(extack, "Missing required attribute for COMP: COMP");
+ goto out;
+ }
+
+ if (attrs[XFRMA_ALG_AEAD] ||
attrs[XFRMA_ALG_AUTH] ||
attrs[XFRMA_ALG_AUTH_TRUNC] ||
attrs[XFRMA_ALG_CRYPT] ||
- attrs[XFRMA_TFCPAD] ||
- (ntohl(p->id.spi) >= 0x10000))
+ attrs[XFRMA_TFCPAD]) {
+ NL_SET_ERR_MSG(extack, "Invalid attributes for COMP: AEAD, AUTH, AUTH_TRUNC, CRYPT, TFCPAD");
+ goto out;
+ }
+
+ if (ntohl(p->id.spi) >= 0x10000) {
+ NL_SET_ERR_MSG(extack, "SPI is too large for COMP (must be < 0x10000)");
goto out;
+ }
break;
#if IS_ENABLED(CONFIG_IPV6)
@@ -244,29 +305,36 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
attrs[XFRMA_ALG_CRYPT] ||
attrs[XFRMA_ENCAP] ||
attrs[XFRMA_SEC_CTX] ||
- attrs[XFRMA_TFCPAD] ||
- !attrs[XFRMA_COADDR])
+ attrs[XFRMA_TFCPAD]) {
+ NL_SET_ERR_MSG(extack, "Invalid attributes for DSTOPTS/ROUTING");
goto out;
+ }
+
+ if (!attrs[XFRMA_COADDR]) {
+ NL_SET_ERR_MSG(extack, "Missing required COADDR attribute for DSTOPTS/ROUTING");
+ goto out;
+ }
break;
#endif
default:
+ NL_SET_ERR_MSG(extack, "Unsupported protocol");
goto out;
}
- if ((err = verify_aead(attrs)))
+ if ((err = verify_aead(attrs, extack)))
goto out;
- if ((err = verify_auth_trunc(attrs)))
+ if ((err = verify_auth_trunc(attrs, extack)))
goto out;
- if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH)))
+ if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH, extack)))
goto out;
- if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT)))
+ if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT, extack)))
goto out;
- if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP)))
+ if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP, extack)))
goto out;
- if ((err = verify_sec_ctx_len(attrs)))
+ if ((err = verify_sec_ctx_len(attrs, extack)))
goto out;
- if ((err = verify_replay(p, attrs)))
+ if ((err = verify_replay(p, attrs, extack)))
goto out;
err = -EINVAL;
@@ -278,14 +346,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
break;
default:
+ NL_SET_ERR_MSG(extack, "Unsupported mode");
goto out;
}
err = 0;
- if (attrs[XFRMA_MTIMER_THRESH])
- if (!attrs[XFRMA_ENCAP])
+ if (attrs[XFRMA_MTIMER_THRESH]) {
+ if (!attrs[XFRMA_ENCAP]) {
+ NL_SET_ERR_MSG(extack, "MTIMER_THRESH attribute can only be set on ENCAP states");
err = -EINVAL;
+ goto out;
+ }
+ }
out:
return err;
@@ -293,7 +366,7 @@ out:
static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
struct xfrm_algo_desc *(*get_byname)(const char *, int),
- struct nlattr *rta)
+ struct nlattr *rta, struct netlink_ext_ack *extack)
{
struct xfrm_algo *p, *ualg;
struct xfrm_algo_desc *algo;
@@ -304,8 +377,10 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
ualg = nla_data(rta);
algo = get_byname(ualg->alg_name, 1);
- if (!algo)
+ if (!algo) {
+ NL_SET_ERR_MSG(extack, "Requested COMP algorithm not found");
return -ENOSYS;
+ }
*props = algo->desc.sadb_alg_id;
p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL);
@@ -317,7 +392,8 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
return 0;
}
-static int attach_crypt(struct xfrm_state *x, struct nlattr *rta)
+static int attach_crypt(struct xfrm_state *x, struct nlattr *rta,
+ struct netlink_ext_ack *extack)
{
struct xfrm_algo *p, *ualg;
struct xfrm_algo_desc *algo;
@@ -328,8 +404,10 @@ static int attach_crypt(struct xfrm_state *x, struct nlattr *rta)
ualg = nla_data(rta);
algo = xfrm_ealg_get_byname(ualg->alg_name, 1);
- if (!algo)
+ if (!algo) {
+ NL_SET_ERR_MSG(extack, "Requested CRYPT algorithm not found");
return -ENOSYS;
+ }
x->props.ealgo = algo->desc.sadb_alg_id;
p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL);
@@ -343,7 +421,7 @@ static int attach_crypt(struct xfrm_state *x, struct nlattr *rta)
}
static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props,
- struct nlattr *rta)
+ struct nlattr *rta, struct netlink_ext_ack *extack)
{
struct xfrm_algo *ualg;
struct xfrm_algo_auth *p;
@@ -355,8 +433,10 @@ static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props,
ualg = nla_data(rta);
algo = xfrm_aalg_get_byname(ualg->alg_name, 1);
- if (!algo)
+ if (!algo) {
+ NL_SET_ERR_MSG(extack, "Requested AUTH algorithm not found");
return -ENOSYS;
+ }
*props = algo->desc.sadb_alg_id;
p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL);
@@ -373,7 +453,7 @@ static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props,
}
static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
- struct nlattr *rta)
+ struct nlattr *rta, struct netlink_ext_ack *extack)
{
struct xfrm_algo_auth *p, *ualg;
struct xfrm_algo_desc *algo;
@@ -384,10 +464,14 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
ualg = nla_data(rta);
algo = xfrm_aalg_get_byname(ualg->alg_name, 1);
- if (!algo)
+ if (!algo) {
+ NL_SET_ERR_MSG(extack, "Requested AUTH_TRUNC algorithm not found");
return -ENOSYS;
- if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
+ }
+ if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) {
+ NL_SET_ERR_MSG(extack, "Invalid length requested for truncated ICV");
return -EINVAL;
+ }
*props = algo->desc.sadb_alg_id;
p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL);
@@ -402,7 +486,8 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
return 0;
}
-static int attach_aead(struct xfrm_state *x, struct nlattr *rta)
+static int attach_aead(struct xfrm_state *x, struct nlattr *rta,
+ struct netlink_ext_ack *extack)
{
struct xfrm_algo_aead *p, *ualg;
struct xfrm_algo_desc *algo;
@@ -413,8 +498,10 @@ static int attach_aead(struct xfrm_state *x, struct nlattr *rta)
ualg = nla_data(rta);
algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1);
- if (!algo)
+ if (!algo) {
+ NL_SET_ERR_MSG(extack, "Requested AEAD algorithm not found");
return -ENOSYS;
+ }
x->props.ealgo = algo->desc.sadb_alg_id;
p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL);
@@ -579,7 +666,8 @@ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
static struct xfrm_state *xfrm_state_construct(struct net *net,
struct xfrm_usersa_info *p,
struct nlattr **attrs,
- int *errp)
+ int *errp,
+ struct netlink_ext_ack *extack)
{
struct xfrm_state *x = xfrm_state_alloc(net);
int err = -ENOMEM;
@@ -606,21 +694,21 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
if (attrs[XFRMA_SA_EXTRA_FLAGS])
x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
- if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD])))
+ if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD], extack)))
goto error;
if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo,
- attrs[XFRMA_ALG_AUTH_TRUNC])))
+ attrs[XFRMA_ALG_AUTH_TRUNC], extack)))
goto error;
if (!x->props.aalgo) {
if ((err = attach_auth(&x->aalg, &x->props.aalgo,
- attrs[XFRMA_ALG_AUTH])))
+ attrs[XFRMA_ALG_AUTH], extack)))
goto error;
}
- if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT])))
+ if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT], extack)))
goto error;
if ((err = attach_one_algo(&x->calg, &x->props.calgo,
xfrm_calg_get_byname,
- attrs[XFRMA_ALG_COMP])))
+ attrs[XFRMA_ALG_COMP], extack)))
goto error;
if (attrs[XFRMA_TFCPAD])
@@ -630,15 +718,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
xfrm_smark_init(attrs, &x->props.smark);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!x->if_id) {
- err = -EINVAL;
- goto error;
- }
- }
- err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
+ err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -658,7 +741,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* sysctl_xfrm_aevent_etime is in 100ms units */
x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M;
- if ((err = xfrm_init_replay(x)))
+ if ((err = xfrm_init_replay(x, extack)))
goto error;
/* override default values from above */
@@ -667,7 +750,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* configure the hardware if offload is requested */
if (attrs[XFRMA_OFFLOAD_DEV]) {
err = xfrm_dev_state_add(net, x,
- nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]),
+ extack);
if (err)
goto error;
}
@@ -683,7 +767,7 @@ error_no_put:
}
static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_usersa_info *p = nlmsg_data(nlh);
@@ -691,11 +775,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
struct km_event c;
- err = verify_newsa_info(p, attrs);
+ err = verify_newsa_info(p, attrs, extack);
if (err)
return err;
- x = xfrm_state_construct(net, p, attrs, &err);
+ x = xfrm_state_construct(net, p, attrs, &err, extack);
if (!x)
return err;
@@ -762,7 +846,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
}
static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
@@ -845,7 +929,7 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
return 0;
}
-static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb)
+static int copy_user_offload(struct xfrm_dev_offload *xso, struct sk_buff *skb)
{
struct xfrm_user_offload *xuo;
struct nlattr *attr;
@@ -857,7 +941,8 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb
xuo = nla_data(attr);
memset(xuo, 0, sizeof(*xuo));
xuo->ifindex = xso->dev->ifindex;
- xuo->flags = xso->flags;
+ if (xso->dir == XFRM_DEV_OFFLOAD_IN)
+ xuo->flags = XFRM_OFFLOAD_INBOUND;
return 0;
}
@@ -1258,7 +1343,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
}
static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrmu_spdhthresh *thresh4 = NULL;
@@ -1303,7 +1389,8 @@ static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct sk_buff *r_skb;
@@ -1362,7 +1449,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
}
static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct sk_buff *r_skb;
@@ -1382,7 +1470,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_usersa_id *p = nlmsg_data(nlh);
@@ -1406,7 +1494,8 @@ out_noput:
}
static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
@@ -1432,13 +1521,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
mark = xfrm_mark_get(attrs, &m);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!if_id) {
- err = -EINVAL;
- goto out_noput;
- }
- }
if (p->info.seq) {
x = xfrm_find_acq_byseq(net, mark, p->info.seq);
@@ -1486,7 +1570,7 @@ out_noput:
return err;
}
-static int verify_policy_dir(u8 dir)
+static int verify_policy_dir(u8 dir, struct netlink_ext_ack *extack)
{
switch (dir) {
case XFRM_POLICY_IN:
@@ -1495,13 +1579,14 @@ static int verify_policy_dir(u8 dir)
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid policy direction");
return -EINVAL;
}
return 0;
}
-static int verify_policy_type(u8 type)
+static int verify_policy_type(u8 type, struct netlink_ext_ack *extack)
{
switch (type) {
case XFRM_POLICY_TYPE_MAIN:
@@ -1511,13 +1596,15 @@ static int verify_policy_type(u8 type)
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid policy type");
return -EINVAL;
}
return 0;
}
-static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
+static int verify_newpolicy_info(struct xfrm_userpolicy_info *p,
+ struct netlink_ext_ack *extack)
{
int ret;
@@ -1529,6 +1616,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid policy share");
return -EINVAL;
}
@@ -1538,35 +1626,44 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid policy action");
return -EINVAL;
}
switch (p->sel.family) {
case AF_INET:
- if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
+ if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 32 for IPv4)");
return -EINVAL;
+ }
break;
case AF_INET6:
#if IS_ENABLED(CONFIG_IPV6)
- if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128)
+ if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 128 for IPv6)");
return -EINVAL;
+ }
break;
#else
+ NL_SET_ERR_MSG(extack, "IPv6 support disabled");
return -EAFNOSUPPORT;
#endif
default:
+ NL_SET_ERR_MSG(extack, "Invalid selector family");
return -EINVAL;
}
- ret = verify_policy_dir(p->dir);
+ ret = verify_policy_dir(p->dir, extack);
if (ret)
return ret;
- if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
+ if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) {
+ NL_SET_ERR_MSG(extack, "Policy index doesn't match direction");
return -EINVAL;
+ }
return 0;
}
@@ -1608,13 +1705,16 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
}
}
-static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
+static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
+ struct netlink_ext_ack *extack)
{
u16 prev_family;
int i;
- if (nr > XFRM_MAX_DEPTH)
+ if (nr > XFRM_MAX_DEPTH) {
+ NL_SET_ERR_MSG(extack, "Template count must be <= XFRM_MAX_DEPTH (" __stringify(XFRM_MAX_DEPTH) ")");
return -EINVAL;
+ }
prev_family = family;
@@ -1634,12 +1734,16 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
case XFRM_MODE_BEET:
break;
default:
- if (ut[i].family != prev_family)
+ if (ut[i].family != prev_family) {
+ NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change");
return -EINVAL;
+ }
break;
}
- if (ut[i].mode >= XFRM_MODE_MAX)
+ if (ut[i].mode >= XFRM_MODE_MAX) {
+ NL_SET_ERR_MSG(extack, "Mode in template must be < XFRM_MODE_MAX (" __stringify(XFRM_MODE_MAX) ")");
return -EINVAL;
+ }
prev_family = ut[i].family;
@@ -1651,17 +1755,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
break;
#endif
default:
+ NL_SET_ERR_MSG(extack, "Invalid family in template");
return -EINVAL;
}
- if (!xfrm_id_proto_valid(ut[i].id.proto))
+ if (!xfrm_id_proto_valid(ut[i].id.proto)) {
+ NL_SET_ERR_MSG(extack, "Invalid XFRM protocol in template");
return -EINVAL;
+ }
}
return 0;
}
-static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs)
+static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_TMPL];
@@ -1672,7 +1780,7 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs)
int nr = nla_len(rt) / sizeof(*utmpl);
int err;
- err = validate_tmpl(nr, utmpl, pol->family);
+ err = validate_tmpl(nr, utmpl, pol->family, extack);
if (err)
return err;
@@ -1681,7 +1789,8 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs)
return 0;
}
-static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs)
+static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_POLICY_TYPE];
struct xfrm_userpolicy_type *upt;
@@ -1693,7 +1802,7 @@ static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs)
type = upt->type;
}
- err = verify_policy_type(type);
+ err = verify_policy_type(type, extack);
if (err)
return err;
@@ -1728,7 +1837,11 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i
p->share = XFRM_SHARE_ANY; /* XXX xp->share */
}
-static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp)
+static struct xfrm_policy *xfrm_policy_construct(struct net *net,
+ struct xfrm_userpolicy_info *p,
+ struct nlattr **attrs,
+ int *errp,
+ struct netlink_ext_ack *extack)
{
struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL);
int err;
@@ -1740,24 +1853,19 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
copy_from_user_policy(xp, p);
- err = copy_from_user_policy_type(&xp->type, attrs);
+ err = copy_from_user_policy_type(&xp->type, attrs, extack);
if (err)
goto error;
- if (!(err = copy_from_user_tmpl(xp, attrs)))
+ if (!(err = copy_from_user_tmpl(xp, attrs, extack)))
err = copy_from_user_sec_ctx(xp, attrs);
if (err)
goto error;
xfrm_mark_get(attrs, &xp->mark);
- if (attrs[XFRMA_IF_ID]) {
+ if (attrs[XFRMA_IF_ID])
xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
- if (!xp->if_id) {
- err = -EINVAL;
- goto error;
- }
- }
return xp;
error:
@@ -1768,7 +1876,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
}
static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_userpolicy_info *p = nlmsg_data(nlh);
@@ -1777,14 +1886,14 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
int excl;
- err = verify_newpolicy_info(p);
+ err = verify_newpolicy_info(p, extack);
if (err)
return err;
- err = verify_sec_ctx_len(attrs);
+ err = verify_sec_ctx_len(attrs, extack);
if (err)
return err;
- xp = xfrm_policy_construct(net, p, attrs, &err);
+ xp = xfrm_policy_construct(net, p, attrs, &err, extack);
if (!xp)
return err;
@@ -2009,12 +2118,9 @@ static int xfrm_notify_userpolicy(struct net *net)
}
up = nlmsg_data(nlh);
- up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
- up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
- up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->in = net->xfrm.policy_default[XFRM_POLICY_IN];
+ up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD];
+ up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
nlmsg_end(skb, nlh);
@@ -2025,26 +2131,26 @@ static int xfrm_notify_userpolicy(struct net *net)
return err;
}
+static bool xfrm_userpolicy_is_valid(__u8 policy)
+{
+ return policy == XFRM_USERPOLICY_BLOCK ||
+ policy == XFRM_USERPOLICY_ACCEPT;
+}
+
static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
- if (up->in == XFRM_USERPOLICY_BLOCK)
- net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN;
- else if (up->in == XFRM_USERPOLICY_ACCEPT)
- net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN;
+ if (xfrm_userpolicy_is_valid(up->in))
+ net->xfrm.policy_default[XFRM_POLICY_IN] = up->in;
- if (up->fwd == XFRM_USERPOLICY_BLOCK)
- net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD;
- else if (up->fwd == XFRM_USERPOLICY_ACCEPT)
- net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD;
+ if (xfrm_userpolicy_is_valid(up->fwd))
+ net->xfrm.policy_default[XFRM_POLICY_FWD] = up->fwd;
- if (up->out == XFRM_USERPOLICY_BLOCK)
- net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT;
- else if (up->out == XFRM_USERPOLICY_ACCEPT)
- net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT;
+ if (xfrm_userpolicy_is_valid(up->out))
+ net->xfrm.policy_default[XFRM_POLICY_OUT] = up->out;
rt_genid_bump_all(net);
@@ -2053,7 +2159,7 @@ static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct sk_buff *r_skb;
struct nlmsghdr *r_nlh;
@@ -2074,20 +2180,17 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
}
r_up = nlmsg_data(r_nlh);
-
- r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
- r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
- r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
- XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->in = net->xfrm.policy_default[XFRM_POLICY_IN];
+ r_up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD];
+ r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT];
nlmsg_end(r_skb, r_nlh);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
}
static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_policy *xp;
@@ -2102,11 +2205,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
p = nlmsg_data(nlh);
delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
- err = copy_from_user_policy_type(&type, attrs);
+ err = copy_from_user_policy_type(&type, attrs, extack);
if (err)
return err;
- err = verify_policy_dir(p->dir);
+ err = verify_policy_dir(p->dir, extack);
if (err)
return err;
@@ -2122,7 +2225,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
- err = verify_sec_ctx_len(attrs);
+ err = verify_sec_ctx_len(attrs, extack);
if (err)
return err;
@@ -2170,7 +2273,8 @@ out:
}
static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct km_event c;
@@ -2270,7 +2374,7 @@ out_cancel:
}
static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
@@ -2314,7 +2418,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
@@ -2365,14 +2469,15 @@ out:
}
static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct km_event c;
u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
- err = copy_from_user_policy_type(&type, attrs);
+ err = copy_from_user_policy_type(&type, attrs, extack);
if (err)
return err;
@@ -2393,7 +2498,8 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_policy *xp;
@@ -2404,11 +2510,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_mark m;
u32 if_id = 0;
- err = copy_from_user_policy_type(&type, attrs);
+ err = copy_from_user_policy_type(&type, attrs, extack);
if (err)
return err;
- err = verify_policy_dir(p->dir);
+ err = verify_policy_dir(p->dir, extack);
if (err)
return err;
@@ -2424,7 +2530,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
- err = verify_sec_ctx_len(attrs);
+ err = verify_sec_ctx_len(attrs, extack);
if (err)
return err;
@@ -2459,7 +2565,8 @@ out:
}
static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_state *x;
@@ -2493,7 +2600,8 @@ out:
}
static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct xfrm_policy *xp;
@@ -2511,15 +2619,15 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
xfrm_mark_get(attrs, &mark);
- err = verify_newpolicy_info(&ua->policy);
+ err = verify_newpolicy_info(&ua->policy, extack);
if (err)
goto free_state;
- err = verify_sec_ctx_len(attrs);
+ err = verify_sec_ctx_len(attrs, extack);
if (err)
goto free_state;
/* build an XP */
- xp = xfrm_policy_construct(net, &ua->policy, attrs, &err);
+ xp = xfrm_policy_construct(net, &ua->policy, attrs, &err, extack);
if (!xp)
goto free_state;
@@ -2598,7 +2706,7 @@ static int copy_from_user_migrate(struct xfrm_migrate *ma,
}
static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
struct xfrm_userpolicy_id *pi = nlmsg_data(nlh);
struct xfrm_migrate m[XFRM_MAX_DEPTH];
@@ -2608,13 +2716,14 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
int n = 0;
struct net *net = sock_net(skb->sk);
struct xfrm_encap_tmpl *encap = NULL;
+ u32 if_id = 0;
if (attrs[XFRMA_MIGRATE] == NULL)
return -EINVAL;
kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL;
- err = copy_from_user_policy_type(&type, attrs);
+ err = copy_from_user_policy_type(&type, attrs, extack);
if (err)
return err;
@@ -2632,7 +2741,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENOMEM;
}
- err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);
+ if (attrs[XFRMA_IF_ID])
+ if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
+ err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id);
kfree(encap);
@@ -2640,7 +2752,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
}
#else
static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attrs)
+ struct nlattr **attrs, struct netlink_ext_ack *extack)
{
return -ENOPROTOOPT;
}
@@ -2836,7 +2948,8 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
};
static const struct xfrm_link {
- int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+ int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **,
+ struct netlink_ext_ack *);
int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
@@ -2938,7 +3051,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
- err = link->doit(skb, nlh, attrs);
+ err = link->doit(skb, nlh, attrs, extack);
/* We need to free skb allocated in xfrm_alloc_compat() before
* returning from this function, because consume_skb() won't take
@@ -3289,11 +3402,11 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
*dir = -EINVAL;
if (len < sizeof(*p) ||
- verify_newpolicy_info(p))
+ verify_newpolicy_info(p, NULL))
return NULL;
nr = ((len - sizeof(*p)) / sizeof(*ut));
- if (validate_tmpl(nr, ut, p->sel.family))
+ if (validate_tmpl(nr, ut, p->sel.family, NULL))
return NULL;
if (p->dir > XFRM_POLICY_OUT)
@@ -3650,10 +3763,8 @@ static int __init xfrm_user_init(void)
rv = register_pernet_subsys(&xfrm_user_net_ops);
if (rv < 0)
return rv;
- rv = xfrm_register_km(&netlink_mgr);
- if (rv < 0)
- unregister_pernet_subsys(&xfrm_user_net_ops);
- return rv;
+ xfrm_register_km(&netlink_mgr);
+ return 0;
}
static void __exit xfrm_user_exit(void)