aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/8021q/vlan_netlink.c7
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/Kconfig7
-rw-r--r--net/9p/Makefile5
-rw-r--r--net/9p/client.c7
-rw-r--r--net/9p/mod.c15
-rw-r--r--net/9p/trans_fd.c14
-rw-r--r--net/9p/trans_virtio.c4
-rw-r--r--net/9p/trans_xen.c23
-rw-r--r--net/Kconfig13
-rw-r--r--net/Kconfig.debug7
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/proc.c4
-rw-r--r--net/ax25/af_ax25.c103
-rw-r--r--net/ax25/ax25_dev.c43
-rw-r--r--net/ax25/ax25_route.c18
-rw-r--r--net/ax25/ax25_subr.c20
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c7
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c11
-rw-r--r--net/batman-adv/gateway_client.c1
-rw-r--r--net/batman-adv/hard-interface.c37
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c3
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/translation-table.c14
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/bluetooth/6lowpan.c3
-rw-r--r--net/bluetooth/af_bluetooth.c19
-rw-r--r--net/bluetooth/bnep/core.c4
-rw-r--r--net/bluetooth/cmtp/core.c2
-rw-r--r--net/bluetooth/eir.c31
-rw-r--r--net/bluetooth/eir.h24
-rw-r--r--net/bluetooth/hci_conn.c42
-rw-r--r--net/bluetooth/hci_core.c17
-rw-r--r--net/bluetooth/hci_event.c192
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/hci_sync.c208
-rw-r--r--net/bluetooth/hidp/core.c2
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bluetooth/mgmt.c327
-rw-r--r--net/bluetooth/mgmt_util.c5
-rw-r--r--net/bluetooth/msft.c183
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c30
-rw-r--r--net/bpf/test_run.c698
-rw-r--r--net/bpfilter/bpfilter_kern.c2
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br.c20
-rw-r--r--net/bridge/br_arp_nd_proxy.c4
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_fdb.c160
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c15
-rw-r--r--net/bridge/br_input.c35
-rw-r--r--net/bridge/br_mdb.c12
-rw-r--r--net/bridge/br_mst.c357
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/bridge/br_netlink.c59
-rw-r--r--net/bridge/br_private.h88
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_switchdev.c144
-rw-r--r--net/bridge/br_sysfs_br.c6
-rw-r--r--net/bridge/br_vlan.c146
-rw-r--r--net/bridge/br_vlan_options.c20
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c7
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c5
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c9
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/bcm.c27
-rw-r--r--net/can/gw.c25
-rw-r--r--net/can/isotp.c453
-rw-r--r--net/can/j1939/socket.c4
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/can/proc.c2
-rw-r--r--net/can/raw.c20
-rw-r--r--net/ceph/buffer.c4
-rw-r--r--net/ceph/ceph_common.c52
-rw-r--r--net/ceph/crush/mapper.c5
-rw-r--r--net/ceph/crypto.c2
-rw-r--r--net/ceph/messenger.c21
-rw-r--r--net/ceph/messenger_v1.c54
-rw-r--r--net/ceph/messenger_v2.c244
-rw-r--r--net/ceph/osd_client.c317
-rw-r--r--net/ceph/osdmap.c12
-rw-r--r--net/core/bpf_sk_storage.c34
-rw-r--r--net/core/datagram.c7
-rw-r--r--net/core/datagram.h15
-rw-r--r--net/core/dev.c955
-rw-r--r--net/core/dev.h112
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/devlink.c887
-rw-r--r--net/core/drop_monitor.c113
-rw-r--r--net/core/filter.c542
-rw-r--r--net/core/flow_dissector.c41
-rw-r--r--net/core/flow_offload.c6
-rw-r--r--net/core/gro.c49
-rw-r--r--net/core/gro_cells.c38
-rw-r--r--net/core/link_watch.c7
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/neighbour.c34
-rw-r--r--net/core/net-procfs.c40
-rw-r--r--net/core/net-sysfs.c31
-rw-r--r--net/core/net_namespace.c24
-rw-r--r--net/core/of_net.c33
-rw-r--r--net/core/page_pool.c183
-rw-r--r--net/core/pktgen.c6
-rw-r--r--net/core/ptp_classifier.c12
-rw-r--r--net/core/rtnetlink.c1005
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/core/skbuff.c158
-rw-r--r--net/core/skmsg.c46
-rw-r--r--net/core/sock.c155
-rw-r--r--net/core/sock_map.c87
-rw-r--r--net/core/sysctl_net_core.c49
-rw-r--r--net/core/utils.c4
-rw-r--r--net/core/xdp.c82
-rw-r--r--net/dcb/dcbnl.c44
-rw-r--r--net/dccp/dccp.h9
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c6
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_neigh.c3
-rw-r--r--net/decnet/dn_nsp_out.c3
-rw-r--r--net/decnet/dn_route.c6
-rw-r--r--net/dsa/dsa.c70
-rw-r--r--net/dsa/dsa2.c229
-rw-r--r--net/dsa/dsa_priv.h144
-rw-r--r--net/dsa/master.c34
-rw-r--r--net/dsa/port.c669
-rw-r--r--net/dsa/slave.c804
-rw-r--r--net/dsa/switch.c542
-rw-r--r--net/dsa/tag_8021q.c333
-rw-r--r--net/dsa/tag_dsa.c19
-rw-r--r--net/dsa/tag_hellcreek.c8
-rw-r--r--net/dsa/tag_lan9303.c21
-rw-r--r--net/dsa/tag_ocelot_8021q.c11
-rw-r--r--net/dsa/tag_qca.c85
-rw-r--r--net/dsa/tag_rtl8_4.c152
-rw-r--r--net/dsa/tag_sja1105.c28
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ethtool/common.c3
-rw-r--r--net/ethtool/eeprom.c2
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rings.c62
-rw-r--r--net/hsr/hsr_debugfs.c40
-rw-r--r--net/hsr/hsr_device.c12
-rw-r--r--net/hsr/hsr_forward.c7
-rw-r--r--net/hsr/hsr_framereg.c209
-rw-r--r--net/hsr/hsr_framereg.h14
-rw-r--r--net/hsr/hsr_main.h30
-rw-r--r--net/hsr/hsr_netlink.c4
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c1
-rw-r--r--net/ieee802154/nl-phy.c4
-rw-r--r--net/ieee802154/nl802154.c8
-rw-r--r--net/ieee802154/socket.c12
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c20
-rw-r--r--net/ipv4/arp.c18
-rw-r--r--net/ipv4/bpf_tcp_ca.c28
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/datagram.c7
-rw-r--r--net/ipv4/devinet.c13
-rw-r--r--net/ipv4/esp4.c12
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c49
-rw-r--r--net/ipv4/fib_lookup.h10
-rw-r--r--net/ipv4/fib_rules.c21
-rw-r--r--net/ipv4/fib_semantics.c155
-rw-r--r--net/ipv4/fib_trie.c89
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/icmp.c188
-rw-r--r--net/ipv4/igmp.c13
-rw-r--r--net/ipv4/inet_connection_sock.c20
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_fragment.c11
-rw-r--r--net/ipv4/inet_hashtables.c189
-rw-r--r--net/ipv4/inet_timewait_sock.c18
-rw-r--r--net/ipv4/inetpeer.c12
-rw-r--r--net/ipv4/ip_forward.c15
-rw-r--r--net/ipv4/ip_fragment.c4
-rw-r--r--net/ipv4/ip_gre.c91
-rw-r--r--net/ipv4/ip_input.c33
-rw-r--r--net/ipv4/ip_options.c31
-rw-r--r--net/ipv4/ip_output.c48
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ipmr.c24
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c0
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c24
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c1
-rw-r--r--net/ipv4/nexthop.c17
-rw-r--r--net/ipv4/ping.c73
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/raw.c19
-rw-r--r--net/ipv4/route.c126
-rw-r--r--net/ipv4/syncookies.c10
-rw-r--r--net/ipv4/sysctl_net_ipv4.c55
-rw-r--r--net/ipv4/tcp.c184
-rw-r--r--net/ipv4/tcp_bbr.c40
-rw-r--r--net/ipv4/tcp_bic.c14
-rw-r--r--net/ipv4/tcp_bpf.c32
-rw-r--r--net/ipv4/tcp_cdg.c30
-rw-r--r--net/ipv4/tcp_cong.c32
-rw-r--r--net/ipv4/tcp_cubic.c43
-rw-r--r--net/ipv4/tcp_dctcp.c29
-rw-r--r--net/ipv4/tcp_highspeed.c18
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_hybla.c18
-rw-r--r--net/ipv4/tcp_illinois.c12
-rw-r--r--net/ipv4/tcp_input.c244
-rw-r--r--net/ipv4/tcp_ipv4.c212
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_nv.c24
-rw-r--r--net/ipv4/tcp_output.c113
-rw-r--r--net/ipv4/tcp_rate.c13
-rw-r--r--net/ipv4/tcp_recovery.c15
-rw-r--r--net/ipv4/tcp_scalable.c4
-rw-r--r--net/ipv4/tcp_vegas.c21
-rw-r--r--net/ipv4/tcp_veno.c24
-rw-r--r--net/ipv4/tcp_westwood.c3
-rw-r--r--net/ipv4/tcp_yeah.c30
-rw-r--r--net/ipv4/udp.c44
-rw-r--r--net/ipv4/udp_bpf.c17
-rw-r--r--net/ipv4/udp_impl.h4
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv4/xfrm4_protocol.c1
-rw-r--r--net/ipv6/addrconf.c336
-rw-r--r--net/ipv6/af_inet6.c31
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c15
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/exthdrs.c52
-rw-r--r--net/ipv6/fib6_rules.c30
-rw-r--r--net/ipv6/icmp.c95
-rw-r--r--net/ipv6/inet6_hashtables.c20
-rw-r--r--net/ipv6/ioam6.c19
-rw-r--r--net/ipv6/ioam6_iptunnel.c59
-rw-r--r--net/ipv6/ip6_fib.c23
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_gre.c77
-rw-r--r--net/ipv6/ip6_input.c44
-rw-r--r--net/ipv6/ip6_offload.c63
-rw-r--r--net/ipv6/ip6_output.c191
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ip6mr.c55
-rw-r--r--net/ipv6/ipv6_sockglue.c6
-rw-r--r--net/ipv6/mcast.c42
-rw-r--r--net/ipv6/ndisc.c85
-rw-r--r--net/ipv6/netfilter.c14
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c0
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c1
-rw-r--r--net/ipv6/ping.c31
-rw-r--r--net/ipv6/raw.c6
-rw-r--r--net/ipv6/reassembly.c1
-rw-r--r--net/ipv6/route.c111
-rw-r--r--net/ipv6/seg6_hmac.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c5
-rw-r--r--net/ipv6/seg6_local.c3
-rw-r--r--net/ipv6/sit.c10
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/sysctl_net_ipv6.c6
-rw-r--r--net/ipv6/tcp_ipv6.c104
-rw-r--r--net/ipv6/udp.c135
-rw-r--r--net/ipv6/udp_impl.h4
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/key/af_key.c18
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c17
-rw-r--r--net/l2tp/l2tp_ppp.c3
-rw-r--r--net/l3mdev/l3mdev.c45
-rw-r--r--net/llc/af_llc.c51
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/agg-rx.c26
-rw-r--r--net/mac80211/agg-tx.c16
-rw-r--r--net/mac80211/airtime.c17
-rw-r--r--net/mac80211/cfg.c164
-rw-r--r--net/mac80211/chan.c20
-rw-r--r--net/mac80211/debugfs.c3
-rw-r--r--net/mac80211/debugfs_key.c2
-rw-r--r--net/mac80211/debugfs_netdev.c6
-rw-r--r--net/mac80211/debugfs_sta.c14
-rw-r--r--net/mac80211/eht.c76
-rw-r--r--net/mac80211/ethtool.c4
-rw-r--r--net/mac80211/he.c8
-rw-r--r--net/mac80211/ht.c8
-rw-r--r--net/mac80211/ibss.c26
-rw-r--r--net/mac80211/ieee80211_i.h52
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/key.c9
-rw-r--r--net/mac80211/main.c18
-rw-r--r--net/mac80211/mesh.c9
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_plink.c24
-rw-r--r--net/mac80211/mlme.c551
-rw-r--r--net/mac80211/ocb.c2
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/rate.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c179
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h2
-rw-r--r--net/mac80211/rx.c151
-rw-r--r--net/mac80211/s1g.c4
-rw-r--r--net/mac80211/scan.c20
-rw-r--r--net/mac80211/sta_info.c113
-rw-r--r--net/mac80211/sta_info.h155
-rw-r--r--net/mac80211/status.c144
-rw-r--r--net/mac80211/tdls.c26
-rw-r--r--net/mac80211/trace.h4
-rw-r--r--net/mac80211/tx.c88
-rw-r--r--net/mac80211/util.c342
-rw-r--r--net/mac80211/vht.c110
-rw-r--r--net/mac80211/wme.c4
-rw-r--r--net/mac80211/wpa.c103
-rw-r--r--net/mac802154/cfg.c1
-rw-r--r--net/mac802154/ieee802154_i.h2
-rw-r--r--net/mac802154/main.c54
-rw-r--r--net/mac802154/util.c22
-rw-r--r--net/mctp/af_mctp.c239
-rw-r--r--net/mctp/device.c36
-rw-r--r--net/mctp/neigh.c2
-rw-r--r--net/mctp/route.c176
-rw-r--r--net/mctp/test/route-test.c169
-rw-r--r--net/mctp/test/utils.c1
-rw-r--r--net/mpls/af_mpls.c5
-rw-r--r--net/mptcp/Makefile4
-rw-r--r--net/mptcp/bpf.c21
-rw-r--r--net/mptcp/ctrl.c21
-rw-r--r--net/mptcp/mib.c11
-rw-r--r--net/mptcp/mib.h13
-rw-r--r--net/mptcp/mptcp_diag.c105
-rw-r--r--net/mptcp/options.c187
-rw-r--r--net/mptcp/pm.c128
-rw-r--r--net/mptcp/pm_netlink.c544
-rw-r--r--net/mptcp/pm_userspace.c454
-rw-r--r--net/mptcp/protocol.c189
-rw-r--r--net/mptcp/protocol.h187
-rw-r--r--net/mptcp/sockopt.c23
-rw-r--r--net/mptcp/subflow.c290
-rw-r--r--net/ncsi/ncsi-manage.c7
-rw-r--r--net/netfilter/Makefile5
-rw-r--r--net/netfilter/core.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_conncount.c11
-rw-r--r--net/netfilter/nf_conntrack_acct.c19
-rw-r--r--net/netfilter/nf_conntrack_bpf.c266
-rw-r--r--net/netfilter/nf_conntrack_core.c524
-rw-r--r--net/netfilter/nf_conntrack_ecache.c225
-rw-r--r--net/netfilter/nf_conntrack_extend.c148
-rw-r--r--net/netfilter/nf_conntrack_helper.c26
-rw-r--r--net/netfilter/nf_conntrack_labels.c20
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c174
-rw-r--r--net/netfilter/nf_conntrack_pptp.c60
-rw-r--r--net/netfilter/nf_conntrack_proto.c10
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c137
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c16
-rw-r--r--net/netfilter/nf_conntrack_standalone.c7
-rw-r--r--net/netfilter/nf_conntrack_timeout.c57
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c20
-rw-r--r--net/netfilter/nf_dup_netdev.c27
-rw-r--r--net/netfilter/nf_flow_table_core.c90
-rw-r--r--net/netfilter/nf_flow_table_inet.c17
-rw-r--r--net/netfilter/nf_flow_table_ip.c103
-rw-r--r--net/netfilter/nf_flow_table_offload.c39
-rw-r--r--net/netfilter/nf_log_syslog.c144
-rw-r--r--net/netfilter/nf_nat_core.c71
-rw-r--r--net/netfilter/nf_nat_masquerade.c5
-rw-r--r--net/netfilter/nf_queue.c36
-rw-r--r--net/netfilter/nf_synproxy_core.c24
-rw-r--r--net/netfilter/nf_tables_api.c424
-rw-r--r--net/netfilter/nf_tables_core.c42
-rw-r--r--net/netfilter/nf_tables_offload.c26
-rw-r--r--net/netfilter/nf_tables_trace.c44
-rw-r--r--net/netfilter/nfnetlink.c26
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c73
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c33
-rw-r--r--net/netfilter/nft_bitwise.c41
-rw-r--r--net/netfilter/nft_byteorder.c11
-rw-r--r--net/netfilter/nft_cmp.c105
-rw-r--r--net/netfilter/nft_compat.c10
-rw-r--r--net/netfilter/nft_connlimit.c16
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c56
-rw-r--r--net/netfilter/nft_dup_netdev.c7
-rw-r--r--net/netfilter/nft_dynset.c1
-rw-r--r--net/netfilter/nft_exthdr.c131
-rw-r--r--net/netfilter/nft_fib.c46
-rw-r--r--net/netfilter/nft_fib_inet.c1
-rw-r--r--net/netfilter/nft_fib_netdev.c1
-rw-r--r--net/netfilter/nft_flow_offload.c52
-rw-r--r--net/netfilter/nft_fwd_netdev.c10
-rw-r--r--net/netfilter/nft_hash.c36
-rw-r--r--net/netfilter/nft_immediate.c24
-rw-r--r--net/netfilter/nft_last.c5
-rw-r--r--net/netfilter/nft_limit.c26
-rw-r--r--net/netfilter/nft_log.c1
-rw-r--r--net/netfilter/nft_lookup.c12
-rw-r--r--net/netfilter/nft_masq.c3
-rw-r--r--net/netfilter/nft_meta.c32
-rw-r--r--net/netfilter/nft_nat.c5
-rw-r--r--net/netfilter/nft_numgen.c34
-rw-r--r--net/netfilter/nft_objref.c2
-rw-r--r--net/netfilter/nft_osf.c25
-rw-r--r--net/netfilter/nft_payload.c21
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_quota.c5
-rw-r--r--net/netfilter/nft_range.c1
-rw-r--r--net/netfilter/nft_redir.c3
-rw-r--r--net/netfilter/nft_reject_inet.c1
-rw-r--r--net/netfilter/nft_reject_netdev.c1
-rw-r--r--net/netfilter/nft_rt.c1
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/nft_set_pipapo.c48
-rw-r--r--net/netfilter/nft_set_rbtree.c6
-rw-r--r--net/netfilter/nft_socket.c87
-rw-r--r--net/netfilter/nft_synproxy.c5
-rw-r--r--net/netfilter/nft_tproxy.c1
-rw-r--r--net/netfilter/nft_tunnel.c28
-rw-r--r--net/netfilter/nft_xfrm.c28
-rw-r--r--net/netfilter/x_tables.c10
-rw-r--r--net/netfilter/xt_hashlimit.c18
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/netlink/af_netlink.c13
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/core.c34
-rw-r--r--net/nfc/llcp.h1
-rw-r--r--net/nfc/llcp_core.c9
-rw-r--r--net/nfc/llcp_sock.c57
-rw-r--r--net/nfc/nci/core.c4
-rw-r--r--net/nfc/nci/data.c2
-rw-r--r--net/nfc/nci/hci.c4
-rw-r--r--net/nfc/nci/uart.c5
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/nfc/rawsock.c3
-rw-r--r--net/openvswitch/actions.c58
-rw-r--r--net/openvswitch/conntrack.c122
-rw-r--r--net/openvswitch/datapath.c18
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow.c145
-rw-r--r--net/openvswitch/flow.h14
-rw-r--r--net/openvswitch/flow_netlink.c142
-rw-r--r--net/openvswitch/vport.c2
-rw-r--r--net/packet/af_packet.c62
-rw-r--r--net/phonet/af_phonet.c8
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c7
-rw-r--r--net/qrtr/af_qrtr.c3
-rw-r--r--net/qrtr/mhi.c2
-rw-r--r--net/rds/ib.c4
-rw-r--r--net/rds/tcp.c18
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_connect.c5
-rw-r--r--net/rds/tcp_listen.c5
-rw-r--r--net/rfkill/core.c48
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/rose/rose_route.c29
-rw-r--r--net/rose/rose_timer.c34
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h53
-rw-r--r--net/rxrpc/call_accept.c10
-rw-r--r--net/rxrpc/call_event.c17
-rw-r--r--net/rxrpc/call_object.c96
-rw-r--r--net/rxrpc/conn_client.c30
-rw-r--r--net/rxrpc/conn_object.c51
-rw-r--r--net/rxrpc/conn_service.c8
-rw-r--r--net/rxrpc/input.c62
-rw-r--r--net/rxrpc/local_object.c71
-rw-r--r--net/rxrpc/net_ns.c9
-rw-r--r--net/rxrpc/output.c22
-rw-r--r--net/rxrpc/peer_object.c40
-rw-r--r--net/rxrpc/proc.c85
-rw-r--r--net/rxrpc/recvmsg.c8
-rw-r--r--net/rxrpc/sendmsg.c6
-rw-r--r--net/rxrpc/server_key.c7
-rw-r--r--net/rxrpc/skbuff.c1
-rw-r--r--net/rxrpc/sysctl.c4
-rw-r--r--net/sched/act_api.c43
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_ct.c162
-rw-r--r--net/sched/act_gact.c13
-rw-r--r--net/sched/act_gate.c3
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_mpls.c10
-rw-r--r--net/sched/act_pedit.c34
-rw-r--r--net/sched/act_police.c76
-rw-r--r--net/sched/act_sample.c3
-rw-r--r--net/sched/act_skbedit.c65
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/act_vlan.c17
-rw-r--r--net/sched/cls_api.c84
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_flower.c230
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sched/cls_u32.c24
-rw-r--r--net/sched/em_meta.c7
-rw-r--r--net/sched/sch_api.c26
-rw-r--r--net/sched/sch_generic.c42
-rw-r--r--net/sched/sch_htb.c20
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_taprio.c3
-rw-r--r--net/sctp/diag.c9
-rw-r--r--net/sctp/input.c4
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/output.c3
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/sm_sideeffect.c4
-rw-r--r--net/sctp/sm_statefuns.c12
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/sctp/stream_sched.c9
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/smc/Makefile1
-rw-r--r--net/smc/af_smc.c573
-rw-r--r--net/smc/smc.h67
-rw-r--r--net/smc/smc_cdc.c29
-rw-r--r--net/smc/smc_clc.c8
-rw-r--r--net/smc/smc_close.c8
-rw-r--r--net/smc/smc_core.c144
-rw-r--r--net/smc/smc_core.h12
-rw-r--r--net/smc/smc_diag.c8
-rw-r--r--net/smc/smc_ib.c1
-rw-r--r--net/smc/smc_netlink.c15
-rw-r--r--net/smc/smc_pnet.c57
-rw-r--r--net/smc/smc_pnet.h2
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/smc/smc_sysctl.c70
-rw-r--r--net/smc/smc_sysctl.h33
-rw-r--r--net/smc/smc_tx.c171
-rw-r--r--net/smc/smc_tx.h3
-rw-r--r--net/smc/smc_wr.c5
-rw-r--r--net/smc/smc_wr.h4
-rw-r--r--net/socket.c149
-rw-r--r--net/sunrpc/auth.c8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c36
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c10
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c4
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c8
-rw-r--r--net/sunrpc/auth_unix.c16
-rw-r--r--net/sunrpc/backchannel_rqst.c8
-rw-r--r--net/sunrpc/cache.c42
-rw-r--r--net/sunrpc/clnt.c77
-rw-r--r--net/sunrpc/debugfs.c3
-rw-r--r--net/sunrpc/fail.h2
-rw-r--r--net/sunrpc/rpc_pipe.c6
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sched.c61
-rw-r--r--net/sunrpc/socklib.c7
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c249
-rw-r--r--net/sunrpc/svc_xprt.c103
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcauth_unix.c60
-rw-r--r--net/sunrpc/svcsock.c34
-rw-r--r--net/sunrpc/sysfs.c120
-rw-r--r--net/sunrpc/xdr.c37
-rw-r--r--net/sunrpc/xprt.c57
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c8
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c14
-rw-r--r--net/sunrpc/xprtrdma/verbs.c30
-rw-r--r--net/sunrpc/xprtsock.c282
-rw-r--r--net/switchdev/switchdev.c232
-rw-r--r--net/tipc/bearer.c17
-rw-r--r--net/tipc/core.c3
-rw-r--r--net/tipc/crypto.c2
-rw-r--r--net/tipc/link.c14
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/msg.h23
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/node.c54
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tls/tls_device.c136
-rw-r--r--net/tls/tls_main.c79
-rw-r--r--net/tls/tls_sw.c497
-rw-r--r--net/unix/af_unix.c293
-rw-r--r--net/unix/garbage.c14
-rw-r--r--net/unix/scm.c6
-rw-r--r--net/unix/unix_bpf.c5
-rw-r--r--net/vmw_vsock/af_vsock.c10
-rw-r--r--net/vmw_vsock/hyperv_transport.c21
-rw-r--r--net/vmw_vsock/virtio_transport.c207
-rw-r--r--net/vmw_vsock/vmci_transport.c10
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/chan.c184
-rw-r--r--net/wireless/core.c17
-rw-r--r--net/wireless/core.h14
-rw-r--r--net/wireless/ibss.c4
-rw-r--r--net/wireless/nl80211.c590
-rw-r--r--net/wireless/pmsr.c4
-rw-r--r--net/wireless/reg.c10
-rw-r--r--net/wireless/scan.c13
-rw-r--r--net/wireless/sme.c3
-rw-r--r--net/wireless/util.c141
-rw-r--r--net/x25/af_x25.c14
-rw-r--r--net/x25/x25_proc.c3
-rw-r--r--net/xdp/xsk.c120
-rw-r--r--net/xdp/xsk_buff_pool.c24
-rw-r--r--net/xdp/xsk_queue.h25
-rw-r--r--net/xdp/xskmap.c6
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_device.c33
-rw-r--r--net/xfrm/xfrm_interface.c9
-rw-r--r--net/xfrm/xfrm_output.c3
-rw-r--r--net/xfrm/xfrm_policy.c33
-rw-r--r--net/xfrm/xfrm_state.c33
-rw-r--r--net/xfrm/xfrm_user.c75
663 files changed, 24705 insertions, 10522 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index a068757eabaf..7b3341cef926 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -5,6 +5,7 @@
* (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
*/
+#include <linux/if_arp.h>
#include <linux/module.h>
#include <net/6lowpan.h>
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 788076b002b3..e40aa3e3641c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -319,8 +319,7 @@ static void vlan_transfer_features(struct net_device *dev,
{
struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
- netif_set_gso_max_size(vlandev, dev->gso_max_size);
- netif_set_gso_max_segs(vlandev, dev->gso_max_segs);
+ netif_inherit_tso_max(vlandev, dev);
if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
vlandev->hard_header_len = dev->hard_header_len;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 1a705a4ef7fa..5eaf38875554 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -129,6 +129,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
+void vlan_dev_free_egress_priority(const struct net_device *dev);
int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
size_t size);
@@ -139,7 +140,6 @@ int vlan_check_real_dev(struct net_device *real_dev,
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
-void vlan_dev_uninit(struct net_device *dev);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 26d031a43cc1..839f2020b015 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -573,8 +573,7 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_ALL_FCOE;
dev->features |= dev->hw_features | NETIF_F_LLTX;
- netif_set_gso_max_size(dev, real_dev->gso_max_size);
- netif_set_gso_max_segs(dev, real_dev->gso_max_segs);
+ netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
@@ -622,7 +621,7 @@ static int vlan_dev_init(struct net_device *dev)
}
/* Note: this function might be called multiple times for the same device. */
-void vlan_dev_uninit(struct net_device *dev)
+void vlan_dev_free_egress_priority(const struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -636,6 +635,11 @@ void vlan_dev_uninit(struct net_device *dev)
}
}
+static void vlan_dev_uninit(struct net_device *dev)
+{
+ vlan_dev_free_egress_priority(dev);
+}
+
static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
netdev_features_t features)
{
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0db85aeb119b..214532173536 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -182,11 +182,16 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
else if (dev->mtu > max_mtu)
return -EINVAL;
+ /* Note: If this initial vlan_changelink() fails, we need
+ * to call vlan_dev_free_egress_priority() to free memory.
+ */
err = vlan_changelink(dev, tb, data, extack);
+
if (!err)
err = register_vlan_dev(dev, extack);
+
if (err)
- vlan_dev_uninit(dev);
+ vlan_dev_free_egress_priority(dev);
return err;
}
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 08bf6c839e25..7825c129742a 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -280,7 +280,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
const struct vlan_priority_tci_mapping *mp
= vlan->egress_priority_map[i];
while (mp) {
- seq_printf(seq, "%u:%hu ",
+ seq_printf(seq, "%u:%d ",
mp->priority, ((mp->vlan_qos >> 13) & 0x7));
mp = mp->next;
}
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 64468c49791f..deabbd376cb1 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -15,6 +15,13 @@ menuconfig NET_9P
if NET_9P
+config NET_9P_FD
+ default NET_9P
+ tristate "9P FD Transport"
+ help
+ This builds support for transports over TCP, Unix sockets and
+ filedescriptors.
+
config NET_9P_VIRTIO
depends on VIRTIO
tristate "9P Virtio Transport"
diff --git a/net/9p/Makefile b/net/9p/Makefile
index aa0a5641e5d0..1df9b344c30b 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NET_9P) := 9pnet.o
+obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
@@ -9,9 +10,11 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
client.o \
error.o \
protocol.o \
- trans_fd.o \
trans_common.o \
+9pnet_fd-objs := \
+ trans_fd.o \
+
9pnet_virtio-objs := \
trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index d062f1e5bfb0..8bba0d9cf975 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1038,8 +1038,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto put_trans;
- if (clnt->msize > clnt->trans_mod->maxsize)
+ if (clnt->msize > clnt->trans_mod->maxsize) {
clnt->msize = clnt->trans_mod->maxsize;
+ pr_info("Limiting 'msize' to %d as this is the maximum "
+ "supported by transport %s\n",
+ clnt->msize, clnt->trans_mod->name
+ );
+ }
if (clnt->msize < 4096) {
p9_debug(P9_DEBUG_ERROR,
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c37fc201a944..55576c1866fa 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -83,7 +83,7 @@ void v9fs_unregister_trans(struct p9_trans_module *m)
}
EXPORT_SYMBOL(v9fs_unregister_trans);
-static struct p9_trans_module *_p9_get_trans_by_name(char *s)
+static struct p9_trans_module *_p9_get_trans_by_name(const char *s)
{
struct p9_trans_module *t, *found = NULL;
@@ -106,7 +106,7 @@ static struct p9_trans_module *_p9_get_trans_by_name(char *s)
* @s: string identifying transport
*
*/
-struct p9_trans_module *v9fs_get_trans_by_name(char *s)
+struct p9_trans_module *v9fs_get_trans_by_name(const char *s)
{
struct p9_trans_module *found = NULL;
@@ -123,6 +123,10 @@ struct p9_trans_module *v9fs_get_trans_by_name(char *s)
}
EXPORT_SYMBOL(v9fs_get_trans_by_name);
+static const char * const v9fs_default_transports[] = {
+ "virtio", "tcp", "fd", "unix", "xen", "rdma",
+};
+
/**
* v9fs_get_default_trans - get the default transport
*
@@ -131,6 +135,7 @@ EXPORT_SYMBOL(v9fs_get_trans_by_name);
struct p9_trans_module *v9fs_get_default_trans(void)
{
struct p9_trans_module *t, *found = NULL;
+ int i;
spin_lock(&v9fs_trans_lock);
@@ -148,6 +153,10 @@ struct p9_trans_module *v9fs_get_default_trans(void)
}
spin_unlock(&v9fs_trans_lock);
+
+ for (i = 0; !found && i < ARRAY_SIZE(v9fs_default_transports); i++)
+ found = v9fs_get_trans_by_name(v9fs_default_transports[i]);
+
return found;
}
EXPORT_SYMBOL(v9fs_get_default_trans);
@@ -177,7 +186,6 @@ static int __init init_p9(void)
p9_error_init();
pr_info("Installing 9P2000 support\n");
- p9_trans_fd_init();
return ret;
}
@@ -191,7 +199,6 @@ static void __exit exit_p9(void)
{
pr_info("Unloading 9P2000 support\n");
- p9_trans_fd_exit();
p9_client_exit();
}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 827c47620fc0..8f8f95e39b03 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1090,6 +1090,7 @@ static struct p9_trans_module p9_tcp_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("tcp");
static struct p9_trans_module p9_unix_trans = {
.name = "unix",
@@ -1103,6 +1104,7 @@ static struct p9_trans_module p9_unix_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("unix");
static struct p9_trans_module p9_fd_trans = {
.name = "fd",
@@ -1116,6 +1118,7 @@ static struct p9_trans_module p9_fd_trans = {
.show_options = p9_fd_show_options,
.owner = THIS_MODULE,
};
+MODULE_ALIAS_9P("fd");
/**
* p9_poll_workfn - poll worker thread
@@ -1149,7 +1152,7 @@ static void p9_poll_workfn(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "finish\n");
}
-int p9_trans_fd_init(void)
+static int __init p9_trans_fd_init(void)
{
v9fs_register_trans(&p9_tcp_trans);
v9fs_register_trans(&p9_unix_trans);
@@ -1158,10 +1161,17 @@ int p9_trans_fd_init(void)
return 0;
}
-void p9_trans_fd_exit(void)
+static void __exit p9_trans_fd_exit(void)
{
flush_work(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
}
+
+module_init(p9_trans_fd_init);
+module_exit(p9_trans_fd_exit);
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_DESCRIPTION("Filedescriptor Transport for 9P");
+MODULE_LICENSE("GPL");
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index bd5a89c4960d..b24a4fb0f0a2 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -648,7 +648,7 @@ fail:
* @args: args passed from sys_mount() for per-transport options (unused)
*
* This sets up a transport channel for 9p communication. Right now
- * we only match the first available channel, but eventually we couldlook up
+ * we only match the first available channel, but eventually we could look up
* alternate channels by matching devname versus a virtio_config entry.
* We use a simple reference count mechanism to ensure that only a single
* mount has a channel open at a time.
@@ -721,7 +721,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
mutex_unlock(&virtio_9p_lock);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 2418fa0b58f3..833cd3792c51 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -279,13 +279,13 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
grant_ref_t ref;
ref = priv->rings[i].intf->ref[j];
- gnttab_end_foreign_access(ref, 0, 0);
+ gnttab_end_foreign_access(ref, NULL);
}
- free_pages((unsigned long)priv->rings[i].data.in,
- priv->rings[i].intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(priv->rings[i].data.in,
+ 1UL << (priv->rings[i].intf->ring_order +
+ XEN_PAGE_SHIFT));
}
- gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
+ gnttab_end_foreign_access(priv->rings[i].ref, NULL);
free_page((unsigned long)priv->rings[i].intf);
}
kfree(priv->rings);
@@ -322,8 +322,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
if (ret < 0)
goto out;
ring->ref = ret;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT),
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes) {
ret = -ENOMEM;
goto out;
@@ -353,12 +353,10 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
out:
if (bytes) {
for (i--; i >= 0; i--)
- gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
- free_pages((unsigned long)bytes,
- ring->intf->ring_order -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ gnttab_end_foreign_access(ring->intf->ref[i], NULL);
+ free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
}
- gnttab_end_foreign_access(ring->ref, 0, 0);
+ gnttab_end_foreign_access(ring->ref, NULL);
free_page((unsigned long)ring->intf);
return ret;
}
@@ -538,6 +536,7 @@ static void p9_trans_xen_exit(void)
}
module_exit(p9_trans_xen_exit);
+MODULE_ALIAS("xen:9pfs");
MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
MODULE_DESCRIPTION("Xen Transport for 9P");
MODULE_LICENSE("GPL");
diff --git a/net/Kconfig b/net/Kconfig
index 8a1f9d0287de..6b78f695caa6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -434,6 +434,19 @@ config NET_DEVLINK
config PAGE_POOL
bool
+config PAGE_POOL_STATS
+ default n
+ bool "Page pool stats"
+ depends on PAGE_POOL
+ help
+ Enable page pool statistics to track page allocation and recycling
+ in page pools. This option incurs additional CPU cost in allocation
+ and recycle paths and additional memory cost to store the statistics.
+ These statistics are only available if this option is enabled and if
+ the driver using the page pool supports exporting this data.
+
+ If unsure, say N.
+
config FAILOVER
tristate "Generic failover module"
help
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index 2f50611df858..e6ae11cc2fb7 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -17,3 +17,10 @@ config NET_NS_REFCNT_TRACKER
help
Enable debugging feature to track netns references.
This adds memory and cpu costs.
+
+config DEBUG_NET
+ bool "Add generic networking debug"
+ depends on DEBUG_KERNEL && NET
+ help
+ Enable extra sanity checks in networking.
+ This is mostly used by fuzzers, but is safe to select.
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index bf5736c1d458..a06f4d4a6f47 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int err = 0;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (!skb)
diff --git a/net/atm/common.c b/net/atm/common.c
index 1cfa9bf1d187..f7019df41c3e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
!test_bit(ATM_VF_READY, &vcc->flags))
return 0;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
@@ -553,7 +553,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
error = skb_copy_datagram_msg(skb, 0, msg, copied);
if (error)
return error;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4369ffa3302a..9bf736290e48 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -108,7 +108,7 @@ out:
static inline void *vcc_walk(struct seq_file *seq, loff_t l)
{
struct vcc_state *state = seq->private;
- int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
+ int family = (uintptr_t)(pde_data(file_inode(seq->file)));
return __vcc_walk(&state->sk, family, &state->bucket, l) ?
state : NULL;
@@ -324,7 +324,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
page = get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- dev = PDE_DATA(file_inode(file));
+ dev = pde_data(file_inode(file));
if (!dev->ops->proc_read)
length = -EINVAL;
else {
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 02f43f3e2c56..4c7030ed8d33 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -62,12 +62,12 @@ static void ax25_free_sock(struct sock *sk)
*/
static void ax25_cb_del(ax25_cb *ax25)
{
+ spin_lock_bh(&ax25_list_lock);
if (!hlist_unhashed(&ax25->ax25_node)) {
- spin_lock_bh(&ax25_list_lock);
hlist_del_init(&ax25->ax25_node);
- spin_unlock_bh(&ax25_list_lock);
ax25_cb_put(ax25);
}
+ spin_unlock_bh(&ax25_list_lock);
}
/*
@@ -77,21 +77,38 @@ static void ax25_kill_by_device(struct net_device *dev)
{
ax25_dev *ax25_dev;
ax25_cb *s;
+ struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
return;
+ ax25_dev->device_up = false;
spin_lock_bh(&ax25_list_lock);
again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
+ sk = s->sk;
+ if (!sk) {
+ spin_unlock_bh(&ax25_list_lock);
+ ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ ax25_cb_del(s);
+ spin_lock_bh(&ax25_list_lock);
+ goto again;
+ }
+ sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
- lock_sock(s->sk);
- s->ax25_dev = NULL;
- release_sock(s->sk);
+ lock_sock(sk);
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ if (sk->sk_socket) {
+ dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
+ }
+ ax25_cb_del(s);
+ release_sock(sk);
spin_lock_bh(&ax25_list_lock);
-
+ sock_put(sk);
/* The entry could have been deleted from the
* list meanwhile and thus the next pointer is
* no longer valid. Play it safe and restart
@@ -355,21 +372,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
return -EFAULT;
- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
- return -ENODEV;
-
if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
return -EINVAL;
if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+ if (!ax25_dev)
+ return -ENODEV;
+
digi.ndigi = ax25_ctl.digi_count;
for (k = 0; k < digi.ndigi; k++)
digi.calls[k] = ax25_ctl.digi_addr[k];
- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+ if (!ax25) {
+ ax25_dev_put(ax25_dev);
return -ENOTCONN;
+ }
switch (ax25_ctl.cmd) {
case AX25_KILL:
@@ -436,6 +457,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
}
out_put:
+ ax25_dev_put(ax25_dev);
ax25_cb_put(ax25);
return ret;
@@ -962,21 +984,25 @@ static int ax25_release(struct socket *sock)
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
+ ax25_dev *ax25_dev;
if (sk == NULL)
return 0;
sock_hold(sk);
- sock_orphan(sk);
lock_sock(sk);
+ sock_orphan(sk);
ax25 = sk_to_ax25(sk);
+ ax25_dev = ax25->ax25_dev;
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
case AX25_STATE_0:
- release_sock(sk);
- ax25_disconnect(ax25, 0);
- lock_sock(sk);
+ if (!sock_flag(ax25->sk, SOCK_DEAD)) {
+ release_sock(sk);
+ ax25_disconnect(ax25, 0);
+ lock_sock(sk);
+ }
ax25_destroy_socket(ax25);
break;
@@ -1031,6 +1057,17 @@ static int ax25_release(struct socket *sock)
sk->sk_state_change(sk);
ax25_destroy_socket(ax25);
}
+ if (ax25_dev) {
+ if (!ax25_dev->device_up) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ }
+ dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
+ }
sock->sk = NULL;
release_sock(sk);
@@ -1107,8 +1144,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
}
- if (ax25_dev != NULL)
+ if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
+ }
done:
ax25_cb_add(ax25);
@@ -1622,9 +1661,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1636,11 +1678,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1687,6 +1747,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 256fadb94df3..95a76d571c44 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
+ ax25_dev_hold(ax25_dev);
}
spin_unlock_bh(&ax25_dev_lock);
@@ -56,10 +57,12 @@ void ax25_dev_device_up(struct net_device *dev)
return;
}
+ refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
ax25_dev->forward = NULL;
+ ax25_dev->device_up = true;
ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE;
ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE;
@@ -84,6 +87,7 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->next = ax25_dev_list;
ax25_dev_list = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -112,27 +116,28 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
- return;
+ goto unlock_put;
}
while (s != NULL && s->next != NULL) {
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
- spin_unlock_bh(&ax25_dev_lock);
- dev->ax25_ptr = NULL;
- dev_put_track(dev, &ax25_dev->dev_tracker);
- kfree(ax25_dev);
- return;
+ goto unlock_put;
}
s = s->next;
}
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
+ ax25_dev_put(ax25_dev);
+ return;
+
+unlock_put:
+ spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
+ dev->ax25_ptr = NULL;
+ dev_put_track(dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -144,20 +149,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
switch (cmd) {
case SIOCAX25ADDFWD:
- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+ if (!fwd_dev) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
- if (ax25_dev->forward != NULL)
+ }
+ if (ax25_dev->forward) {
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = fwd_dev->dev;
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
break;
case SIOCAX25DELFWD:
- if (ax25_dev->forward == NULL)
+ if (!ax25_dev->forward) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = NULL;
+ ax25_dev_put(ax25_dev);
break;
default:
+ ax25_dev_put(ax25_dev);
return -EINVAL;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index d0b2e094bd55..b7c4d656a94b 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_dev *ax25_dev;
int i;
- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
- return -EINVAL;
if (route->digi_count > AX25_MAX_DIGIS)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+ if (!ax25_dev)
+ return -EINVAL;
+
write_lock_bh(&ax25_route_lock);
ax25_rt = ax25_route_list;
@@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if (route->digi_count != 0) {
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
ax25_rt = ax25_rt->next;
@@ -108,10 +112,10 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
- refcount_set(&ax25_rt->refcount, 1);
ax25_rt->callsign = route->dest_addr;
ax25_rt->dev = ax25_dev->dev;
ax25_rt->digipeat = NULL;
@@ -120,6 +124,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
kfree(ax25_rt);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -132,6 +137,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_rt->next = ax25_route_list;
ax25_route_list = ax25_rt;
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -160,12 +166,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
ax25cmp(&route->dest_addr, &s->callsign) == 0) {
if (ax25_route_list == s) {
ax25_route_list = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
} else {
for (t = ax25_route_list; t != NULL; t = t->next) {
if (t->next == s) {
t->next = s->next;
- ax25_put_route(s);
+ __ax25_put_route(s);
break;
}
}
@@ -173,6 +179,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -215,6 +222,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
out:
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return err;
}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 15ab812c4fe4..9ff98f46dc6b 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
- ax25_stop_heartbeat(ax25);
- ax25_stop_t1timer(ax25);
- ax25_stop_t2timer(ax25);
- ax25_stop_t3timer(ax25);
- ax25_stop_idletimer(ax25);
+ if (reason == ENETUNREACH) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ } else {
+ if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_stop_heartbeat(ax25);
+ ax25_stop_t1timer(ax25);
+ ax25_stop_t2timer(ax25);
+ ax25_stop_t3timer(ax25);
+ ax25_stop_idletimer(ax25);
+ }
ax25->state = AX25_STATE_0;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f94f538fa382..7f6a7c96ac92 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -13,13 +13,13 @@
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 71999e13f729..b6db999abf75 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -10,13 +10,13 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1d750f3cb2e4..033639df96d8 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -9,12 +9,12 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 2ed9496fc41f..37ce6cfb3520 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/crc16.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -64,7 +65,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv,
*/
static inline u32 batadv_choose_claim(const void *data, u32 size)
{
- struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
+ const struct batadv_bla_claim *claim = data;
u32 hash = 0;
hash = jhash(&claim->addr, sizeof(claim->addr), hash);
@@ -85,7 +86,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
const struct batadv_bla_backbone_gw *gw;
u32 hash = 0;
- gw = (struct batadv_bla_backbone_gw *)data;
+ gw = data;
hash = jhash(&gw->orig, sizeof(gw->orig), hash);
hash = jhash(&gw->vid, sizeof(gw->vid), hash);
@@ -443,7 +444,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac,
batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
skb->len + ETH_HLEN);
- netif_rx_any_context(skb);
+ netif_rx(skb);
out:
batadv_hardif_put(primary_if);
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 2f008e329007..fefb51a5f606 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -11,6 +11,7 @@
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -20,7 +21,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netlink.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0899a729a23f..c120c7c6d25f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index b7466136e292..d26124bc27e1 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8a2b78f9c4b2..b8f8da7ee3de 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,11 +9,11 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/limits.h>
#include <linux/list.h>
@@ -149,25 +149,28 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
struct net *parent_net;
+ int iflink;
bool ret;
/* check if this is a batman-adv mesh interface */
if (batadv_softif_is_valid(net_dev))
return true;
- /* no more parents..stop recursion */
- if (dev_get_iflink(net_dev) == 0 ||
- dev_get_iflink(net_dev) == net_dev->ifindex)
+ iflink = dev_get_iflink(net_dev);
+ if (iflink == 0)
return false;
parent_net = batadv_getlink_net(net_dev, net);
+ /* iflink to itself, most likely physical device */
+ if (net == parent_net && iflink == net_dev->ifindex)
+ return false;
+
/* recurse over the parent device */
- parent_dev = __dev_get_by_index((struct net *)parent_net,
- dev_get_iflink(net_dev));
- /* if we got a NULL parent_dev there is something broken.. */
+ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
if (!parent_dev) {
- pr_err("Cannot find parent device\n");
+ pr_warn("Cannot find parent device. Skipping batadv-on-batadv check for %s\n",
+ net_dev->name);
return false;
}
@@ -214,14 +217,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
struct net_device *real_netdev = NULL;
struct net *real_net;
struct net *net;
- int ifindex;
+ int iflink;
ASSERT_RTNL();
if (!netdev)
return NULL;
- if (netdev->ifindex == dev_get_iflink(netdev)) {
+ iflink = dev_get_iflink(netdev);
+ if (iflink == 0) {
dev_hold(netdev);
return netdev;
}
@@ -231,9 +235,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
goto out;
net = dev_net(hard_iface->soft_iface);
- ifindex = dev_get_iflink(netdev);
real_net = batadv_getlink_net(netdev, net);
- real_netdev = dev_get_by_index(real_net, ifindex);
+
+ /* iflink to itself, most likely physical device */
+ if (net == real_net && netdev->ifindex == iflink) {
+ real_netdev = netdev;
+ dev_hold(real_netdev);
+ goto out;
+ }
+
+ real_netdev = dev_get_by_index(real_net, iflink);
out:
batadv_hardif_put(hard_iface);
@@ -296,9 +307,11 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
if (!net_device)
return false;
+#if IS_ENABLED(CONFIG_CFG80211)
/* cfg80211 drivers have to set ieee80211_ptr */
if (net_device->ieee80211_ptr)
return true;
+#endif
return false;
}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5207cd8d6ad8..e8a449915566 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/crc32c.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -132,7 +133,6 @@ static void __exit batadv_exit(void)
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
- flush_workqueue(batadv_event_workqueue);
destroy_workqueue(batadv_event_workqueue);
batadv_event_workqueue = NULL;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 494d1ebecac2..23f3d53f4b51 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2022.0"
+#define BATADV_SOURCE_VERSION "2022.2"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index f4004cf0ff6f..b238455913df 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -134,7 +135,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
- if (in6_dev && in6_dev->cnf.mc_forwarding)
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 974d726fabb9..5f4aeeb60dc4 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
@@ -19,7 +20,6 @@
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index aadc653ca1d8..34903df4fe93 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -8,11 +8,11 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 477d85a3b558..0379b126865d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -10,13 +10,13 @@
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 2dbbe6c19609..0f5c0679b55a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -11,6 +11,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/cpumask.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -19,7 +20,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 93730d30af54..7f3dd3c393e0 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -12,13 +12,13 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/kthread.h>
#include <linux/limits.h>
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4b7ad6684bc4..01d30c1e412c 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -13,6 +13,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/container_of.h>
#include <linux/crc32c.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -21,7 +22,6 @@
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -103,10 +103,10 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
*/
static inline u32 batadv_choose_tt(const void *data, u32 size)
{
- struct batadv_tt_common_entry *tt;
+ const struct batadv_tt_common_entry *tt;
u32 hash = 0;
- tt = (struct batadv_tt_common_entry *)data;
+ tt = data;
hash = jhash(&tt->addr, ETH_ALEN, hash);
hash = jhash(&tt->vid, sizeof(tt->vid), hash);
@@ -2766,7 +2766,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
u32 i;
tt_tot = batadv_tt_entries(tt_len);
- tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
+ tt_change = tvlv_buff;
if (!valid_cb)
return;
@@ -3994,7 +3994,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
if (tvlv_value_len < sizeof(*tt_data))
return;
- tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
num_vlan = ntohs(tt_data->num_vlan);
@@ -4037,7 +4037,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
if (tvlv_value_len < sizeof(*tt_data))
return NET_RX_SUCCESS;
- tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data);
@@ -4129,7 +4129,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
goto out;
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
- roaming_adv = (struct batadv_tvlv_roam_adv *)tvlv_value;
+ roaming_adv = tvlv_value;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received ROAMING_ADV from %pM (client %pM)\n",
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 0cb58eb04093..7ec2e2343884 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -7,10 +7,10 @@
#include "main.h"
#include <linux/byteorder/generic.h>
+#include <linux/container_of.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 133d7ea063fb..215af9b3b589 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -240,7 +240,7 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
if (!skb_cp)
return NET_RX_DROP;
- return netif_rx_ni(skb_cp);
+ return netif_rx(skb_cp);
}
static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
@@ -641,7 +641,6 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
return NULL;
peer->chan = chan;
- memset(&peer->peer_addr, 0, sizeof(struct in6_addr));
baswap((void *)peer->lladdr, &chan->dst);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1661979b6a6e..b506409bb498 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -251,7 +251,6 @@ EXPORT_SYMBOL(bt_accept_dequeue);
int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
size_t copied;
@@ -263,7 +262,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
@@ -281,7 +280,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_reset_transport_header(skb);
err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err == 0) {
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name && bt_sk(sk)->skb_msg_name)
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
@@ -385,7 +384,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
copied += chunk;
size -= chunk;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
int skb_len = skb_headlen(skb);
@@ -568,7 +567,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
EXPORT_SYMBOL(bt_sock_wait_state);
/* This function expects the sk lock to be held when called */
-int bt_sock_wait_ready(struct sock *sk, unsigned long flags)
+int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags)
{
DECLARE_WAITQUEUE(wait, current);
unsigned long timeo;
@@ -576,7 +575,7 @@ int bt_sock_wait_ready(struct sock *sk, unsigned long flags)
BT_DBG("sk %p", sk);
- timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+ timeo = sock_sndtimeo(sk, !!(msg_flags & MSG_DONTWAIT));
add_wait_queue(sk_sleep(sk), &wait);
set_current_state(TASK_INTERRUPTIBLE);
@@ -611,7 +610,7 @@ EXPORT_SYMBOL(bt_sock_wait_ready);
static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_lock(&l->lock);
return seq_hlist_start_head(&l->head, *pos);
@@ -619,7 +618,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
return seq_hlist_next(v, &l->head, pos);
}
@@ -627,14 +626,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void bt_seq_stop(struct seq_file *seq, void *v)
__releases(seq->private->l->lock)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
read_unlock(&l->lock);
}
static int bt_seq_show(struct seq_file *seq, void *v)
{
- struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
+ struct bt_sock_list *l = pde_data(file_inode(seq->file));
if (v == SEQ_START_TOKEN) {
seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent");
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index c9add7753b9f..5a6a49885ab6 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -400,7 +400,7 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
dev->stats.rx_packets++;
nskb->ip_summed = CHECKSUM_NONE;
nskb->protocol = eth_type_trans(nskb, dev);
- netif_rx_ni(nskb);
+ netif_rx(nskb);
return 0;
badframe:
@@ -535,7 +535,7 @@ static int bnep_session(void *arg)
up_write(&bnep_session_sem);
free_netdev(dev);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 83eb84e8e688..90d130588a3e 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -323,7 +323,7 @@ static int cmtp_session(void *arg)
up_write(&cmtp_session_sem);
kfree(session);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 7e930f77ecab..7d77fb00c2bf 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -55,6 +55,19 @@ u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
}
+u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data,
+ u8 data_len)
+{
+ eir[eir_len++] = sizeof(u8) + sizeof(uuid) + data_len;
+ eir[eir_len++] = EIR_SERVICE_DATA;
+ put_unaligned_le16(uuid, &eir[eir_len]);
+ eir_len += sizeof(uuid);
+ memcpy(&eir[eir_len], data, data_len);
+ eir_len += data_len;
+
+ return eir_len;
+}
+
static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
{
u8 *ptr = data, *uuids_start = NULL;
@@ -333,3 +346,21 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
return scan_rsp_len;
}
+
+void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
+{
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ u16 value = get_unaligned_le16(eir);
+
+ if (uuid == value) {
+ if (len)
+ *len -= 2;
+ return &eir[2];
+ }
+
+ eir += *len;
+ eir_len -= *len;
+ }
+
+ return NULL;
+}
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 05e2e917fc25..62f2374078f2 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -14,6 +14,13 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len);
u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data,
+ u8 data_len);
+
+static inline u16 eir_precalc_len(u8 data_len)
+{
+ return sizeof(u8) * 2 + data_len;
+}
static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
u8 *data, u8 data_len)
@@ -36,6 +43,21 @@ static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
return eir_len;
}
+static inline u16 eir_skb_put_data(struct sk_buff *skb, u8 type, u8 *data, u8 data_len)
+{
+ u8 *eir;
+ u16 eir_len;
+
+ eir_len = eir_precalc_len(data_len);
+ eir = skb_put(skb, eir_len);
+ WARN_ON(sizeof(type) + data_len > U8_MAX);
+ eir[0] = sizeof(type) + data_len;
+ eir[1] = type;
+ memcpy(&eir[2], data, data_len);
+
+ return eir_len;
+}
+
static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
size_t *data_len)
{
@@ -72,3 +94,5 @@ static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
return NULL;
}
+
+void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 04ebe901e86f..ac06c9724c7f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -481,7 +481,7 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
{
- if (enhanced_sco_capable(conn->hdev))
+ if (enhanced_sync_conn_capable(conn->hdev))
return hci_enhanced_setup_sync_conn(conn, handle);
return hci_setup_sync_conn(conn, handle);
@@ -669,7 +669,9 @@ static void le_conn_timeout(struct work_struct *work)
if (conn->role == HCI_ROLE_SLAVE) {
/* Disable LE Advertising */
le_disable_advertising(hdev);
- hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_dev_unlock(hdev);
return;
}
@@ -689,6 +691,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
+ conn->handle = HCI_CONN_HANDLE_UNSET;
conn->hdev = hdev;
conn->type = type;
conn->role = role;
@@ -870,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
EXPORT_SYMBOL(hci_get_route);
/* This function requires the caller holds hdev->lock */
-void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
struct hci_conn_params *params;
@@ -883,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
params->conn = NULL;
}
- conn->state = BT_CLOSED;
-
/* If the status indicates successful cancellation of
* the attempt (i.e. Unknown Connection Id) there's no point of
* notifying failure since we'll go back to keep trying to
@@ -896,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
- hci_connect_cfm(conn, status);
-
- hci_conn_del(conn);
-
/* Since we may have temporarily stopped the background scanning in
* favor of connection establishment, we should restart it.
*/
@@ -911,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
hci_enable_advertising(hdev);
}
+/* This function requires the caller holds hdev->lock */
+void hci_conn_failed(struct hci_conn *conn, u8 status)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ switch (conn->type) {
+ case LE_LINK:
+ hci_le_conn_failed(conn, status);
+ break;
+ case ACL_LINK:
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ break;
+ }
+
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
struct hci_conn *conn = data;
@@ -924,10 +943,11 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_err(hdev, "request failed to create LE connection: err %d", err);
- if (!conn)
+ /* Check if connection is still pending */
+ if (conn != hci_lookup_le_connect(hdev))
goto done;
- hci_le_conn_failed(conn, err);
+ hci_conn_failed(conn, err);
done:
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2b7bd3655b07..a0f99baafd35 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -571,6 +571,7 @@ int hci_dev_close(__u16 dev)
goto done;
}
+ cancel_work_sync(&hdev->power_on);
if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
cancel_delayed_work(&hdev->power_off);
@@ -2153,7 +2154,7 @@ int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
bacpy(&entry->bdaddr, bdaddr);
entry->bdaddr_type = type;
- bitmap_from_u64(entry->flags, flags);
+ entry->flags = flags;
list_add(&entry->list, list);
@@ -2503,6 +2504,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_LIST_HEAD(&hdev->adv_instances);
INIT_LIST_HEAD(&hdev->blocked_keys);
+ INIT_LIST_HEAD(&hdev->monitored_devices);
INIT_LIST_HEAD(&hdev->local_codecs);
INIT_WORK(&hdev->rx_work, hci_rx_work);
@@ -2554,10 +2556,10 @@ int hci_register_dev(struct hci_dev *hdev)
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -2566,7 +2568,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- sprintf(hdev->name, "hci%d", id);
+ snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -2633,7 +2635,7 @@ int hci_register_dev(struct hci_dev *hdev)
* callback.
*/
if (hdev->wakeup)
- set_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, hdev->conn_flags);
+ hdev->conn_flags |= HCI_CONN_FLAG_REMOTE_WAKEUP;
hci_sock_dev_event(hdev, HCI_DEV_REG);
hci_dev_hold(hdev);
@@ -2738,6 +2740,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_dev_unlock(hdev);
ida_simple_remove(&hci_index_ida, hdev->id);
+ kfree_skb(hdev->sent_cmd);
kfree(hdev);
}
EXPORT_SYMBOL(hci_release_dev);
@@ -3666,8 +3669,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
sco_recv_scodata(conn, skb);
return;
} else {
- bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
- handle);
+ bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d",
+ handle);
}
kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index fc30f4c03d29..af17dfb20e01 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1835,7 +1835,9 @@ static u8 hci_cc_le_clear_accept_list(struct hci_dev *hdev, void *data,
if (rp->status)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->le_accept_list);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1855,8 +1857,10 @@ static u8 hci_cc_le_add_to_accept_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1876,8 +1880,10 @@ static u8 hci_cc_le_del_from_accept_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1949,9 +1955,11 @@ static u8 hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
sent->bdaddr_type, sent->peer_irk,
sent->local_irk);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1971,8 +1979,10 @@ static u8 hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1987,7 +1997,9 @@ static u8 hci_cc_le_clear_resolv_list(struct hci_dev *hdev, void *data,
if (rp->status)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -2834,7 +2846,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -2859,7 +2871,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -3067,13 +3079,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (ev->status)
+ goto unlock;
+
/* Connection may not exist if auto-connected. Check the bredr
* allowlist to see if this device is allowed to auto connect.
* If link is an ACL type, create a connection class
@@ -3106,8 +3125,25 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- if (!ev->status) {
+ /* The HCI_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+ goto unlock;
+ }
+
+ if (!status) {
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ goto done;
+ }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3148,19 +3184,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp),
&cp);
}
- } else {
- conn->state = BT_CLOSED;
- if (conn->type == ACL_LINK)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, ev->status);
}
if (conn->type == ACL_LINK)
hci_sco_setup(conn, ev->status);
- if (ev->status) {
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+done:
+ if (status) {
+ hci_conn_failed(conn, status);
} else if (ev->link_type == SCO_LINK) {
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_CVSD:
@@ -3169,7 +3200,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
break;
}
- hci_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, status);
}
unlock:
@@ -3206,10 +3237,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
return;
}
+ hci_dev_lock(hdev);
+
if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Require HCI_CONNECTABLE or an accept list entry to accept the
@@ -3221,13 +3254,11 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
!hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Connection accepted */
- hci_dev_lock(hdev);
-
ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
if (ie)
memcpy(ie->data.dev_class, ev->dev_class, 3);
@@ -3239,8 +3270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
HCI_ROLE_SLAVE);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
- hci_dev_unlock(hdev);
- return;
+ goto unlock;
}
}
@@ -3280,6 +3310,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
conn->state = BT_CONNECT2;
hci_connect_cfm(conn, 0);
}
+
+ return;
+unlock:
+ hci_dev_unlock(hdev);
}
static u8 hci_to_mgmt_reason(u8 err)
@@ -4534,7 +4568,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
if (!info) {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
- return;
+ goto unlock;
}
bacpy(&data.bdaddr, &info->bdaddr);
@@ -4565,7 +4599,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
if (!info) {
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
- return;
+ goto unlock;
}
bacpy(&data.bdaddr, &info->bdaddr);
@@ -4587,7 +4621,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata,
bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x",
HCI_EV_INQUIRY_RESULT_WITH_RSSI);
}
-
+unlock:
hci_dev_unlock(hdev);
}
@@ -4660,8 +4694,22 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_sync_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ switch (ev->link_type) {
+ case SCO_LINK:
+ case ESCO_LINK:
+ break;
+ default:
+ /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type
+ * for HCI_Synchronous_Connection_Complete is limited to
+ * either SCO or eSCO
+ */
+ bt_dev_err(hdev, "Ignoring connect complete event for invalid link type");
+ return;
+ }
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
@@ -4684,24 +4732,28 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- switch (ev->status) {
+ /* The HCI_Synchronous_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
+ goto unlock;
+ }
+
+ switch (status) {
case 0x00:
- /* The synchronous connection complete event should only be
- * sent once per new connection. Receiving a successful
- * complete event when the connection status is already
- * BT_CONNECTED means that the device is misbehaving and sent
- * multiple complete event packets for the same new connection.
- *
- * Registering the device more than once can corrupt kernel
- * memory, hence upon detecting this invalid event, we report
- * an error and ignore the packet.
- */
- if (conn->state == BT_CONNECTED) {
- bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
- goto unlock;
+ conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ conn->state = BT_CLOSED;
+ break;
}
- conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4745,8 +4797,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- hci_connect_cfm(conn, ev->status);
- if (ev->status)
+ hci_connect_cfm(conn, status);
+ if (status)
hci_conn_del(conn);
unlock:
@@ -5423,8 +5475,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon) {
+ if (hcon && hcon->type == AMP_LINK) {
hcon->state = BT_CLOSED;
+ hci_disconn_cfm(hcon, ev->reason);
hci_conn_del(hcon);
}
@@ -5505,6 +5558,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn = hci_lookup_le_connect(hdev);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (status)
+ goto unlock;
+
conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
@@ -5537,6 +5596,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
cancel_delayed_work(&conn->le_conn_timeout);
}
+ /* The HCI_LE_Connection_Complete event is only sent once per connection.
+ * Processing it more than once per connection can corrupt kernel memory.
+ *
+ * As the connection handle is set here for the first time, it indicates
+ * whether the connection is already set up.
+ */
+ if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
+ goto unlock;
+ }
+
le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
/* Lookup the identity address from the stored connection
@@ -5556,11 +5626,19 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
- if (status) {
- hci_le_conn_failed(conn, status);
- goto unlock;
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
+ HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
}
+ /* All connection failure handling is taken care of by the
+ * hci_conn_failed function which is triggered by the HCI
+ * request completion callbacks used for connecting.
+ */
+ if (status)
+ goto unlock;
+
if (conn->dst_type == ADDR_LE_DEV_PUBLIC)
addr_type = BDADDR_LE_PUBLIC;
else
@@ -5670,8 +5748,6 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- adv = hci_find_adv_instance(hdev, ev->handle);
-
/* The Bluetooth Core 5.3 specification clearly states that this event
* shall not be sent when the Host disables the advertising set. So in
* case of HCI_ERROR_CANCELLED_BY_HOST, just ignore the event.
@@ -5684,9 +5760,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
return;
}
+ hci_dev_lock(hdev);
+
+ adv = hci_find_adv_instance(hdev, ev->handle);
+
if (ev->status) {
if (!adv)
- return;
+ goto unlock;
/* Remove advertising as it has been terminated */
hci_remove_adv_instance(hdev, ev->handle);
@@ -5694,12 +5774,12 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) {
if (adv->enabled)
- return;
+ goto unlock;
}
/* We are no longer advertising, clear HCI_LE_ADV */
hci_dev_clear_flag(hdev, HCI_LE_ADV);
- return;
+ goto unlock;
}
if (adv)
@@ -5714,16 +5794,19 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data,
if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
bacmp(&conn->resp_addr, BDADDR_ANY))
- return;
+ goto unlock;
if (!ev->handle) {
bacpy(&conn->resp_addr, &hdev->random_addr);
- return;
+ goto unlock;
}
if (adv)
bacpy(&conn->resp_addr, &adv->random_addr);
}
+
+unlock:
+ hci_dev_unlock(hdev);
}
static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data,
@@ -6798,7 +6881,7 @@ static const struct hci_ev {
HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt,
sizeof(struct hci_ev_num_comp_blocks)),
/* [0xff = HCI_EV_VENDOR] */
- HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0),
+ HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE),
};
static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
@@ -6823,8 +6906,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb,
* decide if that is acceptable.
*/
if (skb->len > ev->max_len)
- bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u",
- event, skb->len, ev->max_len);
+ bt_dev_warn_ratelimited(hdev,
+ "unexpected event 0x%2.2x length: %u > %u",
+ event, skb->len, ev->max_len);
data = hci_ev_skb_pull(hdev, skb, event, ev->min_len);
if (!data)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 42c8047a9897..38ecaf9264ee 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -261,7 +261,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
if (skb_queue_empty(&req->cmd_q))
bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
- bt_cb(skb)->hci.req_event = event;
+ hci_skb_event(skb) = event;
skb_queue_tail(&req->cmd_q, skb);
}
@@ -482,7 +482,7 @@ static int add_to_accept_list(struct hci_request *req,
/* During suspend, only wakeable devices can be in accept list */
if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
return 0;
*num_entries += 1;
@@ -2260,6 +2260,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
if (err < 0)
own_addr_type = ADDR_LE_DEV_PUBLIC;
+ hci_dev_lock(hdev);
if (hci_is_adv_monitoring(hdev)) {
/* Duplicate filter should be disabled when some advertisement
* monitor is activated, otherwise AdvMon can only receive one
@@ -2276,6 +2277,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
*/
filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
}
+ hci_dev_unlock(hdev);
hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
hdev->le_scan_window_discovery, own_addr_type,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 33b3c0ffc339..189e3115c8c6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1453,7 +1453,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1470,7 +1469,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (sk->sk_state == BT_CLOSED)
return 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 0feb68f12545..1739e8cb3291 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -276,40 +276,37 @@ EXPORT_SYMBOL(__hci_cmd_sync_status);
static void hci_cmd_sync_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work);
- struct hci_cmd_sync_work_entry *entry;
- hci_cmd_sync_work_func_t func;
- hci_cmd_sync_work_destroy_t destroy;
- void *data;
bt_dev_dbg(hdev, "");
- mutex_lock(&hdev->cmd_sync_work_lock);
- entry = list_first_entry(&hdev->cmd_sync_work_list,
- struct hci_cmd_sync_work_entry, list);
- if (entry) {
- list_del(&entry->list);
- func = entry->func;
- data = entry->data;
- destroy = entry->destroy;
- kfree(entry);
- } else {
- func = NULL;
- data = NULL;
- destroy = NULL;
- }
- mutex_unlock(&hdev->cmd_sync_work_lock);
+ /* Dequeue all entries and run them */
+ while (1) {
+ struct hci_cmd_sync_work_entry *entry;
- if (func) {
- int err;
+ mutex_lock(&hdev->cmd_sync_work_lock);
+ entry = list_first_entry_or_null(&hdev->cmd_sync_work_list,
+ struct hci_cmd_sync_work_entry,
+ list);
+ if (entry)
+ list_del(&entry->list);
+ mutex_unlock(&hdev->cmd_sync_work_lock);
- hci_req_sync_lock(hdev);
+ if (!entry)
+ break;
- err = func(hdev, data);
+ bt_dev_dbg(hdev, "entry %p", entry);
- if (destroy)
- destroy(hdev, data, err);
+ if (entry->func) {
+ int err;
+
+ hci_req_sync_lock(hdev);
+ err = entry->func(hdev, entry->data);
+ if (entry->destroy)
+ entry->destroy(hdev, entry->data, err);
+ hci_req_sync_unlock(hdev);
+ }
- hci_req_sync_unlock(hdev);
+ kfree(entry);
}
}
@@ -382,6 +379,9 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
{
struct hci_cmd_sync_work_entry *entry;
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return -ENODEV;
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
@@ -1637,7 +1637,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
* indicates that LL Privacy has been enabled and
* HCI_OP_LE_SET_PRIVACY_MODE is supported.
*/
- if (!test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, params->flags))
+ if (!(params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY))
return 0;
irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type);
@@ -1664,20 +1664,19 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
struct hci_cp_le_add_to_accept_list cp;
int err;
+ /* During suspend, only wakeable devices can be in acceptlist */
+ if (hdev->suspended &&
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ return 0;
+
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
return -ENOSPC;
/* Accept list can not be used with RPAs */
if (!use_ll_privacy(hdev) &&
- hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
+ hci_find_irk_by_addr(hdev, &params->addr, params->addr_type))
return -EINVAL;
- }
-
- /* During suspend, only wakeable devices can be in acceptlist */
- if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
- return 0;
/* Attempt to program the device in the resolving list first to avoid
* having to rollback in case it fails since the resolving list is
@@ -1841,6 +1840,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
+ u8 filter_policy;
int err;
/* Pause advertising if resolving list can be used as controllers are
@@ -1927,6 +1927,8 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
err = -EINVAL;
done:
+ filter_policy = err ? 0x00 : 0x01;
+
/* Enable address resolution when LL Privacy is enabled. */
err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01);
if (err)
@@ -1937,7 +1939,7 @@ done:
hci_resume_advertising_sync(hdev);
/* Select filter policy to use accept list */
- return err ? 0x00 : 0x01;
+ return filter_policy;
}
/* Returns true if an le connection is in the scanning state */
@@ -2806,6 +2808,9 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type,
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return 0;
+ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ return 0;
+
memset(&cp, 0, sizeof(cp));
cp.flt_type = flt_type;
@@ -2826,6 +2831,13 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev)
if (!hci_dev_test_flag(hdev, HCI_EVENT_FILTER_CONFIGURED))
return 0;
+ /* In theory the state machine should not reach here unless
+ * a hci_set_event_filter_sync() call succeeds, but we do
+ * the check both for parity and as a future reminder.
+ */
+ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ return 0;
+
return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00,
BDADDR_ANY, 0x00);
}
@@ -3262,10 +3274,10 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
events[0] |= 0x40; /* LE Data Length Change */
- /* If the controller supports LL Privacy feature, enable
- * the corresponding event.
+ /* If the controller supports LL Privacy feature or LE Extended Adv,
+ * enable the corresponding event.
*/
- if (hdev->le_features[0] & HCI_LE_LL_PRIVACY)
+ if (use_enhanced_conn_complete(hdev))
events[1] |= 0x02; /* LE Enhanced Connection Complete */
/* If the controller supports Extended Scanner Filter
@@ -3812,6 +3824,30 @@ static int hci_init_sync(struct hci_dev *hdev)
return 0;
}
+#define HCI_QUIRK_BROKEN(_quirk, _desc) { HCI_QUIRK_BROKEN_##_quirk, _desc }
+
+static const struct {
+ unsigned long quirk;
+ const char *desc;
+} hci_broken_table[] = {
+ HCI_QUIRK_BROKEN(LOCAL_COMMANDS,
+ "HCI Read Local Supported Commands not supported"),
+ HCI_QUIRK_BROKEN(STORED_LINK_KEY,
+ "HCI Delete Stored Link Key command is advertised, "
+ "but not supported."),
+ HCI_QUIRK_BROKEN(ERR_DATA_REPORTING,
+ "HCI Read Default Erroneous Data Reporting command is "
+ "advertised, but not supported."),
+ HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
+ "HCI Read Transmit Power Level command is advertised, "
+ "but not supported."),
+ HCI_QUIRK_BROKEN(FILTER_CLEAR_ALL,
+ "HCI Set Event Filter command not supported."),
+ HCI_QUIRK_BROKEN(ENHANCED_SETUP_SYNC_CONN,
+ "HCI Enhanced Setup Synchronous Connection command is "
+ "advertised, but not supported.")
+};
+
int hci_dev_open_sync(struct hci_dev *hdev)
{
int ret = 0;
@@ -3873,12 +3909,19 @@ int hci_dev_open_sync(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_SETUP) ||
test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) {
bool invalid_bdaddr;
+ size_t i;
hci_sock_dev_event(hdev, HCI_DEV_SETUP);
if (hdev->setup)
ret = hdev->setup(hdev);
+ for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) {
+ if (test_bit(hci_broken_table[i].quirk, &hdev->quirks))
+ bt_dev_warn(hdev, "%s",
+ hci_broken_table[i].desc);
+ }
+
/* The transport driver can set the quirk to mark the
* BD_ADDR invalid before creating the HCI device or in
* its setup callback.
@@ -4106,9 +4149,9 @@ int hci_dev_close_sync(struct hci_dev *hdev)
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
hci_conn_hash_flush(hdev);
- hci_dev_unlock(hdev);
-
+ /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */
smp_unregister(hdev);
+ hci_dev_unlock(hdev);
hci_sock_dev_event(hdev, HCI_DEV_DOWN);
@@ -4395,12 +4438,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
+ int err;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
return hci_disconnect_sync(hdev, conn, reason);
case BT_CONNECT:
- return hci_connect_cancel_sync(hdev, conn);
+ err = hci_connect_cancel_sync(hdev, conn);
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync.
+ */
+ if (err)
+ hci_conn_failed(conn, err);
+
+ return err;
case BT_CONNECT2:
return hci_reject_conn_sync(hdev, conn, reason);
default:
@@ -4422,7 +4474,7 @@ static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
return err;
}
- return err;
+ return 0;
}
/* This function perform power off HCI command sequence as follows:
@@ -4825,11 +4877,17 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return 0;
+ /* Some fake CSR controllers lock up after setting this type of
+ * filter, so avoid sending the request altogether.
+ */
+ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ return 0;
+
/* Always clear event filter when starting */
hci_clear_event_filter_sync(hdev);
list_for_each_entry(b, &hdev->accept_list, list) {
- if (!test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, b->flags))
+ if (!(b->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
continue;
bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr);
@@ -4853,10 +4911,28 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
return 0;
}
+/* This function disables scan (BR and LE) and mark it as paused */
+static int hci_pause_scan_sync(struct hci_dev *hdev)
+{
+ if (hdev->scanning_paused)
+ return 0;
+
+ /* Disable page scan if enabled */
+ if (test_bit(HCI_PSCAN, &hdev->flags))
+ hci_write_scan_enable_sync(hdev, SCAN_DISABLED);
+
+ hci_scan_disable_sync(hdev);
+
+ hdev->scanning_paused = true;
+
+ return 0;
+}
+
/* This function performs the HCI suspend procedures in the follow order:
*
* Pause discovery (active scanning/inquiry)
* Pause Directed Advertising/Advertising
+ * Pause Scanning (passive scanning in case discovery was not active)
* Disconnect all connections
* Set suspend_status to BT_SUSPEND_DISCONNECT if hdev cannot wakeup
* otherwise:
@@ -4882,15 +4958,11 @@ int hci_suspend_sync(struct hci_dev *hdev)
/* Pause other advertisements */
hci_pause_advertising_sync(hdev);
- /* Disable page scan if enabled */
- if (test_bit(HCI_PSCAN, &hdev->flags))
- hci_write_scan_enable_sync(hdev, SCAN_DISABLED);
-
/* Suspend monitor filters */
hci_suspend_monitor_sync(hdev);
/* Prevent disconnects from causing scanning to be re-enabled */
- hdev->scanning_paused = true;
+ hci_pause_scan_sync(hdev);
/* Soft disconnect everything (power off) */
err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
@@ -4961,6 +5033,22 @@ static void hci_resume_monitor_sync(struct hci_dev *hdev)
}
}
+/* This function resume scan and reset paused flag */
+static int hci_resume_scan_sync(struct hci_dev *hdev)
+{
+ if (!hdev->scanning_paused)
+ return 0;
+
+ hci_update_scan_sync(hdev);
+
+ /* Reset passive scanning to normal */
+ hci_update_passive_scan_sync(hdev);
+
+ hdev->scanning_paused = false;
+
+ return 0;
+}
+
/* This function performs the HCI suspend procedures in the follow order:
*
* Restore event mask
@@ -4983,10 +5071,9 @@ int hci_resume_sync(struct hci_dev *hdev)
/* Clear any event filters and restore scan state */
hci_clear_event_filter_sync(hdev);
- hci_update_scan_sync(hdev);
- /* Reset passive scanning to normal */
- hci_update_passive_scan_sync(hdev);
+ /* Resume scanning */
+ hci_resume_scan_sync(hdev);
/* Resume monitor filters */
hci_resume_monitor_sync(hdev);
@@ -5140,8 +5227,8 @@ static void set_ext_conn_params(struct hci_conn *conn,
p->max_ce_len = cpu_to_le16(0x0000);
}
-int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
- u8 own_addr_type)
+static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
+ struct hci_conn *conn, u8 own_addr_type)
{
struct hci_cp_le_ext_create_conn *cp;
struct hci_cp_le_ext_conn_param *p;
@@ -5185,7 +5272,7 @@ int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN,
plen, data,
HCI_EV_LE_ENHANCED_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ conn->conn_timeout, NULL);
}
int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
@@ -5270,9 +5357,18 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
cp.min_ce_len = cpu_to_le16(0x0000);
cp.max_ce_len = cpu_to_le16(0x0000);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261:
+ *
+ * If this event is unmasked and the HCI_LE_Connection_Complete event
+ * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is
+ * sent when a new connection has been created.
+ */
err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN,
- sizeof(cp), &cp, HCI_EV_LE_CONN_COMPLETE,
- HCI_CMD_TIMEOUT, NULL);
+ sizeof(cp), &cp,
+ use_enhanced_conn_complete(hdev) ?
+ HCI_EV_LE_ENHANCED_CONN_COMPLETE :
+ HCI_EV_LE_CONN_COMPLETE,
+ conn->conn_timeout, NULL);
done:
/* Re-enable advertising after the connection attempt is finished. */
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 80848dfc01db..5940744a8cd8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg)
l2cap_unregister_user(session->conn, &session->user);
hidp_session_put(session);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index e817ff0607a0..ae78490ecd3d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1436,6 +1436,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
l2cap_ecred_init(chan, 0);
+ memset(&data, 0, sizeof(data));
data.pdu.req.psm = chan->psm;
data.pdu.req.mtu = cpu_to_le16(chan->imtu);
data.pdu.req.mps = cpu_to_le16(chan->mps);
@@ -1443,7 +1444,6 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
data.pdu.scid[0] = cpu_to_le16(chan->scid);
chan->ident = l2cap_get_ident(conn);
- data.pid = chan->ops->get_peer_pid(chan);
data.count = 1;
data.chan = chan;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 37087cf7dc5a..ae758ab1b558 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -42,7 +42,7 @@
#include "aosp.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 21
+#define MGMT_REVISION 22
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -174,6 +174,8 @@ static const u16 mgmt_events[] = {
MGMT_EV_ADV_MONITOR_REMOVED,
MGMT_EV_CONTROLLER_SUSPEND,
MGMT_EV_CONTROLLER_RESUME,
+ MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+ MGMT_EV_ADV_MONITOR_DEVICE_LOST,
};
static const u16 mgmt_untrusted_commands[] = {
@@ -1218,7 +1220,13 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip)
static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_mode *cp = cmd->param;
+ struct mgmt_mode *cp;
+
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev))
+ return;
+
+ cp = cmd->param;
bt_dev_dbg(hdev, "err %d", err);
@@ -1242,7 +1250,7 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err)
mgmt_status(err));
}
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
}
static int set_powered_sync(struct hci_dev *hdev, void *data)
@@ -1281,7 +1289,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_POWERED, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1290,6 +1298,9 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd,
mgmt_set_powered_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1383,6 +1394,10 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1402,7 +1417,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_dev_unlock(hdev);
}
@@ -1511,7 +1526,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1538,6 +1553,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_discoverable_sync, cmd,
mgmt_set_discoverable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1550,6 +1568,10 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "err %d", err);
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev))
+ return;
+
hci_dev_lock(hdev);
if (err) {
@@ -1562,7 +1584,9 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
new_settings(hdev, cmd->sk);
done:
- mgmt_pending_free(cmd);
+ if (cmd)
+ mgmt_pending_remove(cmd);
+
hci_dev_unlock(hdev);
}
@@ -1634,7 +1658,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -1654,6 +1678,9 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, set_connectable_sync, cmd,
mgmt_set_connectable_complete);
+ if (err < 0)
+ mgmt_pending_remove(cmd);
+
failed:
hci_dev_unlock(hdev);
return err;
@@ -1774,6 +1801,10 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
u8 enable = cp->val;
bool changed;
+ /* Make sure cmd still outstanding. */
+ if (cmd != pending_find(MGMT_OP_SET_SSP, hdev))
+ return;
+
if (err) {
u8 mgmt_err = mgmt_status(err);
@@ -2267,7 +2298,9 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
struct mgmt_cp_remove_uuid *cp = data;
struct mgmt_pending_cmd *cmd;
struct bt_uuid *match, *tmp;
- u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static const u8 bt_uuid_any[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
int err, found;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -3321,6 +3354,9 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
+ if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev))
+ return;
+
if (status) {
mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
status);
@@ -3493,6 +3529,9 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err)
struct sk_buff *skb = cmd->skb;
u8 status = mgmt_status(err);
+ if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -3759,13 +3798,6 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (pending_find(MGMT_OP_SET_WIDEBAND_SPEECH, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_SET_WIDEBAND_SPEECH,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
if (hdev_is_powered(hdev) &&
!!cp->val != hci_dev_test_flag(hdev,
HCI_WIDEBAND_SPEECH_ENABLED)) {
@@ -3981,10 +4013,11 @@ static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev,
memcpy(ev.uuid, rpa_resolution_uuid, 16);
ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1));
+ // Do we need to be atomic with the conn_flags?
if (enabled && privacy_mode_capable(hdev))
- set_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags);
+ hdev->conn_flags |= HCI_CONN_FLAG_DEVICE_PRIVACY;
else
- clear_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags);
+ hdev->conn_flags &= ~HCI_CONN_FLAG_DEVICE_PRIVACY;
return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev,
&ev, sizeof(ev),
@@ -4403,8 +4436,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = hdev->conn_flags;
memset(&rp, 0, sizeof(rp));
@@ -4415,8 +4447,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
if (!br_params)
goto done;
- bitmap_to_arr32(&current_flags, br_params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = br_params->flags;
} else {
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
le_addr_type(cp->addr.type));
@@ -4424,8 +4455,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
if (!params)
goto done;
- bitmap_to_arr32(&current_flags, params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = params->flags;
}
bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -4470,8 +4500,8 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
&cp->addr.bdaddr, cp->addr.type,
__le32_to_cpu(current_flags));
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ // We should take hci_dev_lock() early, I think.. conn_flags can change
+ supported_flags = hdev->conn_flags;
if ((supported_flags | current_flags) != supported_flags) {
bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)",
@@ -4487,7 +4517,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
cp->addr.type);
if (br_params) {
- bitmap_from_u64(br_params->flags, current_flags);
+ br_params->flags = current_flags;
status = MGMT_STATUS_SUCCESS;
} else {
bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)",
@@ -4497,14 +4527,26 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
le_addr_type(cp->addr.type));
if (params) {
- bitmap_from_u64(params->flags, current_flags);
+ /* Devices using RPAs can only be programmed in the
+ * acceptlist LL Privacy has been enable otherwise they
+ * cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP.
+ */
+ if ((current_flags & HCI_CONN_FLAG_REMOTE_WAKEUP) &&
+ !use_ll_privacy(hdev) &&
+ hci_find_irk_by_addr(hdev, &params->addr,
+ params->addr_type)) {
+ bt_dev_warn(hdev,
+ "Cannot set wakeable for RPA");
+ goto unlock;
+ }
+
+ params->flags = current_flags;
status = MGMT_STATUS_SUCCESS;
/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
* has been set.
*/
- if (test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY,
- params->flags))
+ if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY)
hci_update_passive_scan(hdev);
} else {
bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
@@ -4513,9 +4555,10 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
}
}
-done:
+unlock:
hci_dev_unlock(hdev);
+done:
if (status == MGMT_STATUS_SUCCESS)
device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
@@ -5036,12 +5079,6 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
- MGMT_STATUS_BUSY);
- goto unlock;
- }
-
cmd = mgmt_pending_new(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0);
if (!cmd)
err = -ENOMEM;
@@ -5261,11 +5298,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) &&
+ cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED:
DISCOVERY_FINDING);
@@ -5327,7 +5369,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
else
hdev->discovery.limited = false;
- cmd = mgmt_pending_new(sk, op, hdev, data, len);
+ cmd = mgmt_pending_add(sk, op, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -5336,7 +5378,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5430,7 +5472,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_START_SERVICE_DISCOVERY,
+ cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY,
hdev, data, len);
if (!cmd) {
err = -ENOMEM;
@@ -5463,7 +5505,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
err = hci_cmd_sync_queue(hdev, start_discovery_sync, cmd,
start_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto failed;
}
@@ -5495,11 +5537,14 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
{
struct mgmt_pending_cmd *cmd = data;
+ if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev))
+ return;
+
bt_dev_dbg(hdev, "err %d", err);
mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
cmd->param, 1);
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
if (!err)
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
@@ -5535,7 +5580,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- cmd = mgmt_pending_new(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
+ cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto unlock;
@@ -5544,7 +5589,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
err = hci_cmd_sync_queue(hdev, stop_discovery_sync, cmd,
stop_discovery_complete);
if (err < 0) {
- mgmt_pending_free(cmd);
+ mgmt_pending_remove(cmd);
goto unlock;
}
@@ -7102,8 +7147,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
addr_type);
if (params)
- bitmap_to_arr32(&current_flags, params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = params->flags;
}
err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL);
@@ -7112,8 +7156,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
added:
device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = hdev->conn_flags;
device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
@@ -7474,6 +7517,9 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data,
u8 status = mgmt_status(err);
u16 eir_len;
+ if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev))
+ return;
+
if (!status) {
if (!skb)
status = MGMT_STATUS_FAILED;
@@ -7918,7 +7964,7 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
return false;
/* Make sure that the data is correctly formatted. */
- for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) {
+ for (i = 0; i < len; i += (cur_len + 1)) {
cur_len = data[i];
if (!cur_len)
@@ -7969,11 +8015,7 @@ static bool requested_adv_flags_are_valid(struct hci_dev *hdev, u32 adv_flags)
static bool adv_busy(struct hci_dev *hdev)
{
- return (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_PARAMS, hdev) ||
- pending_find(MGMT_OP_ADD_EXT_ADV_DATA, hdev));
+ return pending_find(MGMT_OP_SET_LE, hdev);
}
static void add_adv_complete(struct hci_dev *hdev, struct sock *sk, u8 instance,
@@ -8046,7 +8088,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
u32 flags;
u8 status;
u16 timeout, duration;
- unsigned int prev_instance_cnt = hdev->adv_instance_cnt;
+ unsigned int prev_instance_cnt;
u8 schedule_instance = 0;
struct adv_info *next_instance;
int err;
@@ -8097,6 +8139,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
+ prev_instance_cnt = hdev->adv_instance_cnt;
+
err = hci_add_adv_instance(hdev, cp->instance, flags,
cp->adv_data_len, cp->data,
cp->scan_rsp_len,
@@ -8563,9 +8607,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
- pending_find(MGMT_OP_SET_LE, hdev)) {
+ if (pending_find(MGMT_OP_SET_LE, hdev)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING,
MGMT_STATUS_BUSY);
goto unlock;
@@ -8601,7 +8643,6 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
struct mgmt_cp_get_adv_size_info *cp = data;
struct mgmt_rp_get_adv_size_info rp;
u32 flags, supported_flags;
- int err;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -8628,10 +8669,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true);
rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false);
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
- MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
-
- return err;
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
}
static const struct hci_mgmt_handler mgmt_handlers[] = {
@@ -9059,12 +9098,14 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
u16 eir_len = 0;
u32 flags = 0;
+ /* allocate buff for LE or BR/EDR adv */
if (conn->le_adv_data_len > 0)
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
- conn->le_adv_data_len);
+ sizeof(*ev) + conn->le_adv_data_len);
else
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED,
- 2 + name_len + 5);
+ sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) +
+ eir_precalc_len(sizeof(conn->dev_class)));
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, &conn->dst);
@@ -9083,18 +9124,12 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
skb_put_data(skb, conn->le_adv_data, conn->le_adv_data_len);
eir_len = conn->le_adv_data_len;
} else {
- if (name_len > 0) {
- eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE,
- name, name_len);
- skb_put(skb, eir_len);
- }
+ if (name)
+ eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len);
- if (memcmp(conn->dev_class, "\0\0\0", 3) != 0) {
- eir_len = eir_append_data(ev->eir, eir_len,
- EIR_CLASS_OF_DEV,
- conn->dev_class, 3);
- skb_put(skb, 5);
- }
+ if (memcmp(conn->dev_class, "\0\0\0", sizeof(conn->dev_class)))
+ eir_len += eir_skb_put_data(skb, EIR_CLASS_OF_DEV,
+ conn->dev_class, sizeof(conn->dev_class));
}
ev->eir_len = cpu_to_le16(eir_len);
@@ -9589,12 +9624,120 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
return true;
}
+void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle,
+ bdaddr_t *bdaddr, u8 addr_type)
+{
+ struct mgmt_ev_adv_monitor_device_lost ev;
+
+ ev.monitor_handle = cpu_to_le16(handle);
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = addr_type;
+
+ mgmt_event(MGMT_EV_ADV_MONITOR_DEVICE_LOST, hdev, &ev, sizeof(ev),
+ NULL);
+}
+
+static void mgmt_send_adv_monitor_device_found(struct hci_dev *hdev,
+ struct sk_buff *skb,
+ struct sock *skip_sk,
+ u16 handle)
+{
+ struct sk_buff *advmon_skb;
+ size_t advmon_skb_len;
+ __le16 *monitor_handle;
+
+ if (!skb)
+ return;
+
+ advmon_skb_len = (sizeof(struct mgmt_ev_adv_monitor_device_found) -
+ sizeof(struct mgmt_ev_device_found)) + skb->len;
+ advmon_skb = mgmt_alloc_skb(hdev, MGMT_EV_ADV_MONITOR_DEVICE_FOUND,
+ advmon_skb_len);
+ if (!advmon_skb)
+ return;
+
+ /* ADV_MONITOR_DEVICE_FOUND is similar to DEVICE_FOUND event except
+ * that it also has 'monitor_handle'. Make a copy of DEVICE_FOUND and
+ * store monitor_handle of the matched monitor.
+ */
+ monitor_handle = skb_put(advmon_skb, sizeof(*monitor_handle));
+ *monitor_handle = cpu_to_le16(handle);
+ skb_put_data(advmon_skb, skb->data, skb->len);
+
+ mgmt_event_skb(advmon_skb, skip_sk);
+}
+
+static void mgmt_adv_monitor_device_found(struct hci_dev *hdev,
+ bdaddr_t *bdaddr, bool report_device,
+ struct sk_buff *skb,
+ struct sock *skip_sk)
+{
+ struct monitored_device *dev, *tmp;
+ bool matched = false;
+ bool notified = false;
+
+ /* We have received the Advertisement Report because:
+ * 1. the kernel has initiated active discovery
+ * 2. if not, we have pend_le_reports > 0 in which case we are doing
+ * passive scanning
+ * 3. if none of the above is true, we have one or more active
+ * Advertisement Monitor
+ *
+ * For case 1 and 2, report all advertisements via MGMT_EV_DEVICE_FOUND
+ * and report ONLY one advertisement per device for the matched Monitor
+ * via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+ *
+ * For case 3, since we are not active scanning and all advertisements
+ * received are due to a matched Advertisement Monitor, report all
+ * advertisements ONLY via MGMT_EV_ADV_MONITOR_DEVICE_FOUND event.
+ */
+ if (report_device && !hdev->advmon_pend_notify) {
+ mgmt_event_skb(skb, skip_sk);
+ return;
+ }
+
+ hdev->advmon_pend_notify = false;
+
+ list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+ if (!bacmp(&dev->bdaddr, bdaddr)) {
+ matched = true;
+
+ if (!dev->notified) {
+ mgmt_send_adv_monitor_device_found(hdev, skb,
+ skip_sk,
+ dev->handle);
+ notified = true;
+ dev->notified = true;
+ }
+ }
+
+ if (!dev->notified)
+ hdev->advmon_pend_notify = true;
+ }
+
+ if (!report_device &&
+ ((matched && !notified) || !msft_monitor_supported(hdev))) {
+ /* Handle 0 indicates that we are not active scanning and this
+ * is a subsequent advertisement report for an already matched
+ * Advertisement Monitor or the controller offloading support
+ * is not available.
+ */
+ mgmt_send_adv_monitor_device_found(hdev, skb, skip_sk, 0);
+ }
+
+ if (report_device)
+ mgmt_event_skb(skb, skip_sk);
+ else
+ kfree_skb(skb);
+}
+
void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
{
struct sk_buff *skb;
struct mgmt_ev_device_found *ev;
+ bool report_device = hci_discovery_active(hdev);
/* Don't send events for a non-kernel initiated discovery. With
* LE one exception is if we have pend_le_reports > 0 in which
@@ -9603,11 +9746,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
if (!hci_discovery_active(hdev)) {
if (link_type == ACL_LINK)
return;
- if (link_type == LE_LINK &&
- list_empty(&hdev->pend_le_reports) &&
- !hci_is_adv_monitoring(hdev)) {
+ if (link_type == LE_LINK && !list_empty(&hdev->pend_le_reports))
+ report_device = true;
+ else if (!hci_is_adv_monitoring(hdev))
return;
- }
}
if (hdev->discovery.result_filtering) {
@@ -9672,7 +9814,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
- mgmt_event_skb(skb, NULL);
+ mgmt_adv_monitor_device_found(hdev, bdaddr, report_device, skb, NULL);
}
void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
@@ -9680,28 +9822,21 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
{
struct sk_buff *skb;
struct mgmt_ev_device_found *ev;
- u16 eir_len;
- u32 flags;
+ u16 eir_len = 0;
+ u32 flags = 0;
- if (name_len)
- skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 2 + name_len);
- else
- skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 0);
+ skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND,
+ sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0));
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, bdaddr);
ev->addr.type = link_to_bdaddr(link_type, addr_type);
ev->rssi = rssi;
- if (name) {
- eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name,
- name_len);
- flags = 0;
- skb_put(skb, eir_len);
- } else {
- eir_len = 0;
+ if (name)
+ eir_len += eir_skb_put_data(skb, EIR_NAME_COMPLETE, name, name_len);
+ else
flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED;
- }
ev->eir_len = cpu_to_le16(eir_len);
ev->flags = cpu_to_le32(flags);
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index edee60bbc7b4..b69cfed62088 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -77,11 +77,12 @@ int mgmt_send_event_skb(unsigned short channel, struct sk_buff *skb, int flag,
{
struct hci_dev *hdev;
struct mgmt_hdr *hdr;
- int len = skb->len;
+ int len;
if (!skb)
return -EINVAL;
+ len = skb->len;
hdev = bt_cb(skb)->mgmt.hdev;
/* Time stamp */
@@ -296,7 +297,7 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
- list_add(&cmd->list, &hdev->mgmt_pending);
+ list_add_tail(&cmd->list, &hdev->mgmt_pending);
return cmd;
}
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 6a943634b31a..f43994523b1f 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -80,6 +80,14 @@ struct msft_rp_le_set_advertisement_filter_enable {
__u8 sub_opcode;
} __packed;
+#define MSFT_EV_LE_MONITOR_DEVICE 0x02
+struct msft_ev_le_monitor_device {
+ __u8 addr_type;
+ bdaddr_t bdaddr;
+ __u8 monitor_handle;
+ __u8 monitor_state;
+} __packed;
+
struct msft_monitor_advertisement_handle_data {
__u8 msft_handle;
__u16 mgmt_handle;
@@ -204,6 +212,37 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
return NULL;
}
+/* This function requires the caller holds hdev->lock */
+static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
+ bdaddr_t *bdaddr, __u8 addr_type,
+ bool notify)
+{
+ struct monitored_device *dev, *tmp;
+ int count = 0;
+
+ list_for_each_entry_safe(dev, tmp, &hdev->monitored_devices, list) {
+ /* mgmt_handle == 0 indicates remove all devices, whereas,
+ * bdaddr == NULL indicates remove all devices matching the
+ * mgmt_handle.
+ */
+ if ((!mgmt_handle || dev->handle == mgmt_handle) &&
+ (!bdaddr || (!bacmp(bdaddr, &dev->bdaddr) &&
+ addr_type == dev->addr_type))) {
+ if (notify && dev->notified) {
+ mgmt_adv_monitor_device_lost(hdev, dev->handle,
+ &dev->bdaddr,
+ dev->addr_type);
+ }
+
+ list_del(&dev->list);
+ kfree(dev);
+ count++;
+ }
+ }
+
+ return count;
+}
+
static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
u8 status, u16 opcode,
struct sk_buff *skb)
@@ -291,9 +330,14 @@ static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
/* Do not free the monitor if it is being removed due to
* suspend. It will be re-monitored on resume.
*/
- if (monitor && !msft->suspending)
+ if (monitor && !msft->suspending) {
hci_free_adv_monitor(hdev, monitor);
+ /* Clear any monitored devices by this Adv Monitor */
+ msft_monitor_device_del(hdev, handle_data->mgmt_handle,
+ NULL, 0, false);
+ }
+
list_del(&handle_data->list);
kfree(handle_data);
}
@@ -479,6 +523,16 @@ int msft_resume_sync(struct hci_dev *hdev)
if (!msft || !msft_monitor_supported(hdev))
return 0;
+ hci_dev_lock(hdev);
+
+ /* Clear already tracked devices on resume. Once the monitors are
+ * reregistered, devices in range will be found again after resume.
+ */
+ hdev->advmon_pend_notify = false;
+ msft_monitor_device_del(hdev, 0, NULL, 0, true);
+
+ hci_dev_unlock(hdev);
+
msft->resuming = true;
while (1) {
@@ -557,6 +611,14 @@ void msft_do_close(struct hci_dev *hdev)
list_del(&handle_data->list);
kfree(handle_data);
}
+
+ hci_dev_lock(hdev);
+
+ /* Clear any devices that are being monitored and notify device lost */
+ hdev->advmon_pend_notify = false;
+ msft_monitor_device_del(hdev, 0, NULL, 0, true);
+
+ hci_dev_unlock(hdev);
}
void msft_register(struct hci_dev *hdev)
@@ -590,10 +652,101 @@ void msft_unregister(struct hci_dev *hdev)
kfree(msft);
}
+/* This function requires the caller holds hdev->lock */
+static void msft_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 addr_type, __u16 mgmt_handle)
+{
+ struct monitored_device *dev;
+
+ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ bt_dev_err(hdev, "MSFT vendor event %u: no memory",
+ MSFT_EV_LE_MONITOR_DEVICE);
+ return;
+ }
+
+ bacpy(&dev->bdaddr, bdaddr);
+ dev->addr_type = addr_type;
+ dev->handle = mgmt_handle;
+ dev->notified = false;
+
+ INIT_LIST_HEAD(&dev->list);
+ list_add(&dev->list, &hdev->monitored_devices);
+ hdev->advmon_pend_notify = true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_device_lost(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ __u8 addr_type, __u16 mgmt_handle)
+{
+ if (!msft_monitor_device_del(hdev, mgmt_handle, bdaddr, addr_type,
+ true)) {
+ bt_dev_err(hdev, "MSFT vendor event %u: dev %pMR not in list",
+ MSFT_EV_LE_MONITOR_DEVICE, bdaddr);
+ }
+}
+
+static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
+ u8 ev, size_t len)
+{
+ void *data;
+
+ data = skb_pull_data(skb, len);
+ if (!data)
+ bt_dev_err(hdev, "Malformed MSFT vendor event: 0x%02x", ev);
+
+ return data;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct msft_ev_le_monitor_device *ev;
+ struct msft_monitor_advertisement_handle_data *handle_data;
+ u8 addr_type;
+
+ ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
+ if (!ev)
+ return;
+
+ bt_dev_dbg(hdev,
+ "MSFT vendor event 0x%02x: handle 0x%04x state %d addr %pMR",
+ MSFT_EV_LE_MONITOR_DEVICE, ev->monitor_handle,
+ ev->monitor_state, &ev->bdaddr);
+
+ handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
+ if (!handle_data)
+ return;
+
+ switch (ev->addr_type) {
+ case ADDR_LE_DEV_PUBLIC:
+ addr_type = BDADDR_LE_PUBLIC;
+ break;
+
+ case ADDR_LE_DEV_RANDOM:
+ addr_type = BDADDR_LE_RANDOM;
+ break;
+
+ default:
+ bt_dev_err(hdev,
+ "MSFT vendor event 0x%02x: unknown addr type 0x%02x",
+ MSFT_EV_LE_MONITOR_DEVICE, ev->addr_type);
+ return;
+ }
+
+ if (ev->monitor_state)
+ msft_device_found(hdev, &ev->bdaddr, addr_type,
+ handle_data->mgmt_handle);
+ else
+ msft_device_lost(hdev, &ev->bdaddr, addr_type,
+ handle_data->mgmt_handle);
+}
+
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
{
struct msft_data *msft = hdev->msft_data;
- u8 event;
+ u8 *evt_prefix;
+ u8 *evt;
if (!msft)
return;
@@ -602,13 +755,12 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
* matches, and otherwise just return.
*/
if (msft->evt_prefix_len > 0) {
- if (skb->len < msft->evt_prefix_len)
+ evt_prefix = msft_skb_pull(hdev, skb, 0, msft->evt_prefix_len);
+ if (!evt_prefix)
return;
- if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len))
+ if (memcmp(evt_prefix, msft->evt_prefix, msft->evt_prefix_len))
return;
-
- skb_pull(skb, msft->evt_prefix_len);
}
/* Every event starts at least with an event code and the rest of
@@ -617,10 +769,23 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
if (skb->len < 1)
return;
- event = *skb->data;
- skb_pull(skb, 1);
+ evt = msft_skb_pull(hdev, skb, 0, sizeof(*evt));
+ if (!evt)
+ return;
+
+ hci_dev_lock(hdev);
- bt_dev_dbg(hdev, "MSFT vendor event %u", event);
+ switch (*evt) {
+ case MSFT_EV_LE_MONITOR_DEVICE:
+ msft_monitor_device_evt(hdev, skb);
+ break;
+
+ default:
+ bt_dev_dbg(hdev, "MSFT vendor event 0x%02x", *evt);
+ break;
+ }
+
+ hci_dev_unlock(hdev);
}
__u64 msft_get_features(struct hci_dev *hdev)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8eabf41b2993..1111da4e2f2b 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -574,19 +574,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
- return -EBADFD;
+ lock_sock(sk);
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
- if (sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
+ if (!hdev) {
+ err = -EHOSTUNREACH;
+ goto done;
+ }
hci_dev_lock(hdev);
- lock_sock(sk);
-
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
@@ -885,7 +890,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
err = -EBADFD;
break;
}
- if (enhanced_sco_capable(hdev) &&
+ if (enhanced_sync_conn_capable(hdev) &&
voice.setting == BT_VOICE_TRANSPARENT)
sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT;
hci_dev_put(hdev);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index fbc896323bec..e78dadfc5829 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -72,13 +72,16 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
args->args[3], args->args[4]);
}
+extern const struct bpf_link_ops bpf_struct_ops_link_lops;
+
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_progs *tprogs;
+ struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
int prog_ret;
@@ -92,8 +95,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(args))
return PTR_ERR(args);
- tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
- if (!tprogs) {
+ tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
+ if (!tlinks) {
err = -ENOMEM;
goto out;
}
@@ -105,8 +108,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
}
set_vm_flush_reset_perms(image);
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /* prog doesn't take the ownership of the reference from caller */
+ bpf_prog_inc(prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog);
+
op_idx = prog->expected_attach_type;
- err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+ err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
image, image + PAGE_SIZE);
if (err < 0)
@@ -124,7 +136,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
out:
kfree(args);
bpf_jit_free_exec(image);
- kfree(tprogs);
+ if (link)
+ bpf_link_put(&link->link);
+ kfree(tlinks);
return err;
}
@@ -145,7 +159,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct btf *btf,
const struct btf_type *t, int off,
int size, enum bpf_access_type atype,
- u32 *next_btf_id)
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
{
const struct btf_type *state;
s32 type_id;
@@ -162,7 +177,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
- err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+ err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+ flag);
if (err < 0)
return err;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 46dd95755967..56f059b3c242 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -5,6 +5,7 @@
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/slab.h>
+#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
@@ -14,6 +15,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
+#include <net/page_pool.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
@@ -52,10 +54,11 @@ static void bpf_test_timer_leave(struct bpf_test_timer *t)
rcu_read_unlock();
}
-static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
+static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations,
+ u32 repeat, int *err, u32 *duration)
__must_hold(rcu)
{
- t->i++;
+ t->i += iterations;
if (t->i >= repeat) {
/* We're done. */
t->time_spent += ktime_get_ns() - t->time_start;
@@ -87,6 +90,285 @@ reset:
return false;
}
+/* We put this struct at the head of each page with a context and frame
+ * initialised when the page is allocated, so we don't have to do this on each
+ * repetition of the test run.
+ */
+struct xdp_page_head {
+ struct xdp_buff orig_ctx;
+ struct xdp_buff ctx;
+ struct xdp_frame frm;
+ u8 data[];
+};
+
+struct xdp_test_data {
+ struct xdp_buff *orig_ctx;
+ struct xdp_rxq_info rxq;
+ struct net_device *dev;
+ struct page_pool *pp;
+ struct xdp_frame **frames;
+ struct sk_buff **skbs;
+ struct xdp_mem_info mem;
+ u32 batch_size;
+ u32 frame_cnt;
+};
+
+#define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
+#define TEST_XDP_MAX_BATCH 256
+
+static void xdp_test_run_init_page(struct page *page, void *arg)
+{
+ struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
+ struct xdp_buff *new_ctx, *orig_ctx;
+ u32 headroom = XDP_PACKET_HEADROOM;
+ struct xdp_test_data *xdp = arg;
+ size_t frm_len, meta_len;
+ struct xdp_frame *frm;
+ void *data;
+
+ orig_ctx = xdp->orig_ctx;
+ frm_len = orig_ctx->data_end - orig_ctx->data_meta;
+ meta_len = orig_ctx->data - orig_ctx->data_meta;
+ headroom -= meta_len;
+
+ new_ctx = &head->ctx;
+ frm = &head->frm;
+ data = &head->data;
+ memcpy(data + headroom, orig_ctx->data_meta, frm_len);
+
+ xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
+ xdp_prepare_buff(new_ctx, data, headroom, frm_len, true);
+ new_ctx->data = new_ctx->data_meta + meta_len;
+
+ xdp_update_frame_from_buff(new_ctx, frm);
+ frm->mem = new_ctx->rxq->mem;
+
+ memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx));
+}
+
+static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
+{
+ struct page_pool *pp;
+ int err = -ENOMEM;
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = xdp->batch_size,
+ .nid = NUMA_NO_NODE,
+ .init_callback = xdp_test_run_init_page,
+ .init_arg = xdp,
+ };
+
+ xdp->frames = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL);
+ if (!xdp->frames)
+ return -ENOMEM;
+
+ xdp->skbs = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL);
+ if (!xdp->skbs)
+ goto err_skbs;
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ goto err_pp;
+ }
+
+ /* will copy 'mem.id' into pp->xdp_mem_id */
+ err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
+ if (err)
+ goto err_mmodel;
+
+ xdp->pp = pp;
+
+ /* We create a 'fake' RXQ referencing the original dev, but with an
+ * xdp_mem_info pointing to our page_pool
+ */
+ xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
+ xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
+ xdp->rxq.mem.id = pp->xdp_mem_id;
+ xdp->dev = orig_ctx->rxq->dev;
+ xdp->orig_ctx = orig_ctx;
+
+ return 0;
+
+err_mmodel:
+ page_pool_destroy(pp);
+err_pp:
+ kvfree(xdp->skbs);
+err_skbs:
+ kvfree(xdp->frames);
+ return err;
+}
+
+static void xdp_test_run_teardown(struct xdp_test_data *xdp)
+{
+ xdp_unreg_mem_model(&xdp->mem);
+ page_pool_destroy(xdp->pp);
+ kfree(xdp->frames);
+ kfree(xdp->skbs);
+}
+
+static bool ctx_was_changed(struct xdp_page_head *head)
+{
+ return head->orig_ctx.data != head->ctx.data ||
+ head->orig_ctx.data_meta != head->ctx.data_meta ||
+ head->orig_ctx.data_end != head->ctx.data_end;
+}
+
+static void reset_ctx(struct xdp_page_head *head)
+{
+ if (likely(!ctx_was_changed(head)))
+ return;
+
+ head->ctx.data = head->orig_ctx.data;
+ head->ctx.data_meta = head->orig_ctx.data_meta;
+ head->ctx.data_end = head->orig_ctx.data_end;
+ xdp_update_frame_from_buff(&head->ctx, &head->frm);
+}
+
+static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
+ struct sk_buff **skbs,
+ struct net_device *dev)
+{
+ gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+ int i, n;
+ LIST_HEAD(list);
+
+ n = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, (void **)skbs);
+ if (unlikely(n == 0)) {
+ for (i = 0; i < nframes; i++)
+ xdp_return_frame(frames[i]);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nframes; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct sk_buff *skb = skbs[i];
+
+ skb = __xdp_build_skb_from_frame(xdpf, skb, dev);
+ if (!skb) {
+ xdp_return_frame(xdpf);
+ continue;
+ }
+
+ list_add_tail(&skb->list, &list);
+ }
+ netif_receive_skb_list(&list);
+
+ return 0;
+}
+
+static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
+ u32 repeat)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ int err = 0, act, ret, i, nframes = 0, batch_sz;
+ struct xdp_frame **frames = xdp->frames;
+ struct xdp_page_head *head;
+ struct xdp_frame *frm;
+ bool redirect = false;
+ struct xdp_buff *ctx;
+ struct page *page;
+
+ batch_sz = min_t(u32, repeat, xdp->batch_size);
+
+ local_bh_disable();
+ xdp_set_return_frame_no_direct();
+
+ for (i = 0; i < batch_sz; i++) {
+ page = page_pool_dev_alloc_pages(xdp->pp);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ head = phys_to_virt(page_to_phys(page));
+ reset_ctx(head);
+ ctx = &head->ctx;
+ frm = &head->frm;
+ xdp->frame_cnt++;
+
+ act = bpf_prog_run_xdp(prog, ctx);
+
+ /* if program changed pkt bounds we need to update the xdp_frame */
+ if (unlikely(ctx_was_changed(head))) {
+ ret = xdp_update_frame_from_buff(ctx, frm);
+ if (ret) {
+ xdp_return_buff(ctx);
+ continue;
+ }
+ }
+
+ switch (act) {
+ case XDP_TX:
+ /* we can't do a real XDP_TX since we're not in the
+ * driver, so turn it into a REDIRECT back to the same
+ * index
+ */
+ ri->tgt_index = xdp->dev->ifindex;
+ ri->map_id = INT_MAX;
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ fallthrough;
+ case XDP_REDIRECT:
+ redirect = true;
+ ret = xdp_do_redirect_frame(xdp->dev, ctx, frm, prog);
+ if (ret)
+ xdp_return_buff(ctx);
+ break;
+ case XDP_PASS:
+ frames[nframes++] = frm;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(NULL, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ xdp_return_buff(ctx);
+ break;
+ }
+ }
+
+out:
+ if (redirect)
+ xdp_do_flush();
+ if (nframes) {
+ ret = xdp_recv_frames(frames, nframes, xdp->skbs, xdp->dev);
+ if (ret)
+ err = ret;
+ }
+
+ xdp_clear_return_frame_no_direct();
+ local_bh_enable();
+ return err;
+}
+
+static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx,
+ u32 repeat, u32 batch_size, u32 *time)
+
+{
+ struct xdp_test_data xdp = { .batch_size = batch_size };
+ struct bpf_test_timer t = { .mode = NO_MIGRATE };
+ int ret;
+
+ if (!repeat)
+ repeat = 1;
+
+ ret = xdp_test_run_setup(&xdp, ctx);
+ if (ret)
+ return ret;
+
+ bpf_test_timer_enter(&t);
+ do {
+ xdp.frame_cnt = 0;
+ ret = xdp_test_run_batch(&xdp, prog, repeat - t.i);
+ if (unlikely(ret < 0))
+ break;
+ } while (bpf_test_timer_continue(&t, xdp.frame_cnt, repeat, &ret, time));
+ bpf_test_timer_leave(&t);
+
+ xdp_test_run_teardown(&xdp);
+ return ret;
+}
+
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
@@ -118,7 +400,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = bpf_prog_run(prog, ctx);
- } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time));
bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
@@ -130,7 +412,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
static int bpf_test_finish(const union bpf_attr *kattr,
union bpf_attr __user *uattr, const void *data,
- u32 size, u32 retval, u32 duration)
+ struct skb_shared_info *sinfo, u32 size,
+ u32 retval, u32 duration)
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
int err = -EFAULT;
@@ -145,8 +428,42 @@ static int bpf_test_finish(const union bpf_attr *kattr,
err = -ENOSPC;
}
- if (data_out && copy_to_user(data_out, data, copy_size))
- goto out;
+ if (data_out) {
+ int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
+
+ if (len < 0) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ if (copy_to_user(data_out, data, len))
+ goto out;
+
+ if (sinfo) {
+ int i, offset = len;
+ u32 data_len;
+
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ if (offset >= copy_size) {
+ err = -ENOSPC;
+ break;
+ }
+
+ data_len = min_t(u32, copy_size - offset,
+ skb_frag_size(frag));
+
+ if (copy_to_user(data_out + offset,
+ skb_frag_address(frag),
+ data_len))
+ goto out;
+
+ offset += data_len;
+ }
+ }
+ }
+
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
@@ -165,12 +482,14 @@ out:
* future.
*/
__diag_push();
-__diag_ignore(GCC, 8, "-Wmissing-prototypes",
- "Global functions as their definitions will be in vmlinux BTF");
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
int noinline bpf_fentry_test1(int a)
{
return a + 1;
}
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
int noinline bpf_fentry_test2(int a, u64 b)
{
@@ -232,28 +551,197 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
return sk;
}
+struct prog_test_member1 {
+ int a;
+};
+
+struct prog_test_member {
+ struct prog_test_member1 m;
+ int c;
+};
+
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_member memb;
+ struct prog_test_ref_kfunc *next;
+ refcount_t cnt;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+ .cnt = REFCOUNT_INIT(1),
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ refcount_inc(&prog_test_struct.cnt);
+ return &prog_test_struct;
+}
+
+noinline struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+ if (!p)
+ return;
+
+ refcount_dec(&p->cnt);
+}
+
+noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
+{
+}
+
+noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
+{
+ struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
+
+ if (!p)
+ return NULL;
+ refcount_inc(&p->cnt);
+ return p;
+}
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test1)
BTF_ID(func, bpf_kfunc_call_test2)
BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
- if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
- return true;
- return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
-
-static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
- u32 headroom, u32 tailroom)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb1_release)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_SET_END(test_sk_check_kfunc_ids)
+
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb1_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
+BTF_SET_START(test_sk_kptr_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
+BTF_SET_END(test_sk_kptr_acquire_kfunc_ids)
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+ u32 size, u32 headroom, u32 tailroom)
{
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
- u32 user_size = kattr->test.data_size_in;
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
@@ -283,7 +771,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
int b = 2, err = -EFAULT;
u32 retval = 0;
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
switch (prog->expected_attach_type) {
@@ -347,7 +835,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
/* doesn't support data_in/out, ctx_out, duration, or repeat */
if (kattr->test.data_in || kattr->test.data_out ||
kattr->test.ctx_out || kattr->test.duration ||
- kattr->test.repeat)
+ kattr->test.repeat || kattr->test.batch_size)
return -EINVAL;
if (ctx_size_in < prog->aux->max_ctx_offset ||
@@ -524,7 +1012,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
cb->pkt_len = skb->len;
} else {
if (__skb->wire_len < skb->len ||
- __skb->wire_len > GSO_MAX_SIZE)
+ __skb->wire_len > GSO_LEGACY_MAX_SIZE)
return -EINVAL;
cb->pkt_len = __skb->wire_len;
}
@@ -578,10 +1066,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
- data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+ data = bpf_test_init(kattr, kattr->test.data_size_in,
+ size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
return PTR_ERR(data);
@@ -683,7 +1172,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
size = skb_headlen(skb);
- ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
+ duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
@@ -757,22 +1247,38 @@ static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
+ bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- u32 headroom = XDP_PACKET_HEADROOM;
+ u32 batch_size = kattr->test.batch_size;
+ u32 retval = 0, duration, max_data_sz;
u32 size = kattr->test.data_size_in;
+ u32 headroom = XDP_PACKET_HEADROOM;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
+ struct skb_shared_info *sinfo;
struct xdp_buff xdp = {};
- u32 retval, duration;
+ int i, ret = -EINVAL;
struct xdp_md *ctx;
- u32 max_data_sz;
void *data;
- int ret = -EINVAL;
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
+ if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES)
+ return -EINVAL;
+
+ if (do_live) {
+ if (!batch_size)
+ batch_size = NAPI_POLL_WEIGHT;
+ else if (batch_size > TEST_XDP_MAX_BATCH)
+ return -E2BIG;
+
+ headroom += sizeof(struct xdp_page_head);
+ } else if (batch_size) {
+ return -EINVAL;
+ }
+
ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -781,33 +1287,81 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
/* There can't be user provided data before the meta data */
if (ctx->data_meta || ctx->data_end != size ||
ctx->data > ctx->data_end ||
- unlikely(xdp_metalen_invalid(ctx->data)))
+ unlikely(xdp_metalen_invalid(ctx->data)) ||
+ (do_live && (kattr->test.data_out || kattr->test.ctx_out)))
goto free_ctx;
/* Meta data is allocated from the headroom */
headroom -= ctx->data;
}
- /* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
+ if (size > max_data_sz) {
+ /* disallow live data mode for jumbo frames */
+ if (do_live)
+ goto free_ctx;
+ size = max_data_sz;
+ }
- data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
+ data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto free_ctx;
}
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
- &rxqueue->xdp_rxq);
+ rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+ xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret)
goto free_data;
+ if (unlikely(kattr->test.data_size_in > size)) {
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+
+ while (size < kattr->test.data_size_in) {
+ struct page *page;
+ skb_frag_t *frag;
+ u32 data_len;
+
+ if (sinfo->nr_frags == MAX_SKB_FRAGS) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frag = &sinfo->frags[sinfo->nr_frags++];
+ __skb_frag_set_page(frag, page);
+
+ data_len = min_t(u32, kattr->test.data_size_in - size,
+ PAGE_SIZE);
+ skb_frag_size_set(frag, data_len);
+
+ if (copy_from_user(page_address(page), data_in + size,
+ data_len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ sinfo->xdp_frags_size += data_len;
+ size += data_len;
+ }
+ xdp_buff_set_frags_flag(&xdp);
+ }
+
if (repeat > 1)
bpf_prog_change_xdp(NULL, prog);
- ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+
+ if (do_live)
+ ret = bpf_test_run_xdp_live(prog, &xdp, repeat, batch_size, &duration);
+ else
+ ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
* even if the test run failed.
@@ -816,12 +1370,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ret)
goto out;
- if (xdp.data_meta != data + headroom ||
- xdp.data_end != xdp.data_meta + size)
- size = xdp.data_end - xdp.data_meta;
-
- ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
- duration);
+ size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
+ retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct xdp_md));
@@ -830,6 +1381,8 @@ out:
if (repeat > 1)
bpf_prog_change_xdp(prog, NULL);
free_data:
+ for (i = 0; i < sinfo->nr_frags; i++)
+ __free_page(skb_frag_page(&sinfo->frags[i]));
kfree(data);
free_ctx:
kfree(ctx);
@@ -870,13 +1423,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
if (size < ETH_HLEN)
return -EINVAL;
- data = bpf_test_init(kattr, size, 0, 0);
+ data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -905,14 +1458,14 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
do {
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size, flags);
- } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
if (ret < 0)
goto out;
- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
- retval, duration);
+ ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
+ sizeof(flow_keys), retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
sizeof(struct bpf_flow_keys));
@@ -937,7 +1490,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
return -EINVAL;
- if (kattr->test.flags || kattr->test.cpu)
+ if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
@@ -960,7 +1513,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
goto out;
- if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ if (user_ctx->local_port > U16_MAX) {
ret = -ERANGE;
goto out;
}
@@ -968,7 +1521,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
ctx.family = (u16)user_ctx->family;
ctx.protocol = (u16)user_ctx->protocol;
ctx.dport = (u16)user_ctx->local_port;
- ctx.sport = (__force __be16)user_ctx->remote_port;
+ ctx.sport = user_ctx->remote_port;
switch (ctx.family) {
case AF_INET:
@@ -1000,7 +1553,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
do {
ctx.selected_sk = NULL;
retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
- } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+ } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
if (ret < 0)
@@ -1016,7 +1569,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
}
- ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+ ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
if (!ret)
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
@@ -1039,7 +1592,8 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
if (kattr->test.data_in || kattr->test.data_out ||
kattr->test.ctx_out || kattr->test.duration ||
- kattr->test.repeat || kattr->test.flags)
+ kattr->test.repeat || kattr->test.flags ||
+ kattr->test.batch_size)
return -EINVAL;
if (ctx_size_in < prog->aux->max_ctx_offset ||
@@ -1067,3 +1621,39 @@ out:
kfree(ctx);
return err;
}
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+ .owner = THIS_MODULE,
+ .check_set = &test_sk_check_kfunc_ids,
+ .acquire_set = &test_sk_acquire_kfunc_ids,
+ .release_set = &test_sk_release_kfunc_ids,
+ .ret_null_set = &test_sk_ret_null_kfunc_ids,
+ .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids
+};
+
+BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(struct, prog_test_member)
+BTF_ID(func, bpf_kfunc_call_memb_release)
+
+static int __init bpf_prog_test_run_init(void)
+{
+ const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = {
+ {
+ .btf_id = bpf_prog_test_dtor_kfunc_ids[0],
+ .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1]
+ },
+ {
+ .btf_id = bpf_prog_test_dtor_kfunc_ids[2],
+ .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3],
+ },
+ };
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+ return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
+ ARRAY_SIZE(bpf_prog_test_dtor_kfunc),
+ THIS_MODULE);
+}
+late_initcall(bpf_prog_test_run_init);
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 51a941b56ec3..422ec6e7ccff 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -70,7 +70,7 @@ static int bpfilter_process_sockopt(struct sock *sk, int optname,
.addr = (uintptr_t)optval.user,
.len = optlen,
};
- if (uaccess_kernel() || sockptr_is_kernel(optval)) {
+ if (sockptr_is_kernel(optval)) {
pr_err("kernel access not supported\n");
return -EFAULT;
}
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 7fb9a021873b..24bd1c0a9a5a 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o
-bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o
+bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o br_mst.o
bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1fac72cc617f..96e91d69a9a8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -265,6 +265,9 @@ int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
err = br_multicast_toggle_vlan_snooping(br, on, extack);
break;
+ case BR_BOOLOPT_MST_ENABLE:
+ err = br_mst_set_enabled(br, on, extack);
+ break;
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -281,6 +284,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
return br_opt_get(br, BROPT_NO_LL_LEARN);
case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
+ case BR_BOOLOPT_MST_ENABLE:
+ return br_opt_get(br, BROPT_MST_ENABLED);
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -342,23 +347,26 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit(struct net *net)
+static void __net_exit br_net_exit_batch(struct list_head *net_list)
{
struct net_device *dev;
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- for_each_netdev(net, dev)
- if (netif_is_bridge_master(dev))
- br_dev_delete(dev, &list);
+
+ list_for_each_entry(net, net_list, exit_list)
+ for_each_netdev(net, dev)
+ if (netif_is_bridge_master(dev))
+ br_dev_delete(dev, &list);
unregister_netdevice_many(&list);
- rtnl_unlock();
+ rtnl_unlock();
}
static struct pernet_operations br_net_ops = {
- .exit = br_net_exit,
+ .exit_batch = br_net_exit_batch,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 3db1def4437b..e5e48c6e35d7 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -84,7 +84,7 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->pkt_type = PACKET_HOST;
- netif_rx_ni(skb);
+ netif_rx(skb);
}
}
@@ -364,7 +364,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
reply->ip_summed = CHECKSUM_UNNECESSARY;
reply->pkt_type = PACKET_HOST;
- netif_rx_ni(reply);
+ netif_rx(reply);
}
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8d6bab244c4a..58a4f70e01e3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -465,6 +465,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fix_features = br_fix_features,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
+ .ndo_fdb_del_bulk = br_fdb_delete_bulk,
.ndo_fdb_dump = br_fdb_dump,
.ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ccda68bd473..e7f4fccb6adb 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -558,18 +558,161 @@ void br_fdb_cleanup(struct work_struct *work)
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
}
-/* Completely flush all dynamic entries in forwarding database.*/
-void br_fdb_flush(struct net_bridge *br)
+static bool __fdb_flush_matches(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *f,
+ const struct net_bridge_fdb_flush_desc *desc)
+{
+ const struct net_bridge_port *dst = READ_ONCE(f->dst);
+ int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex;
+
+ if (desc->vlan_id && desc->vlan_id != f->key.vlan_id)
+ return false;
+ if (desc->port_ifindex && desc->port_ifindex != port_ifidx)
+ return false;
+ if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags)
+ return false;
+
+ return true;
+}
+
+/* Flush forwarding database entries matching the description */
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc)
{
struct net_bridge_fdb_entry *f;
- struct hlist_node *tmp;
- spin_lock_bh(&br->hash_lock);
- hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
- if (!test_bit(BR_FDB_STATIC, &f->flags))
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (!__fdb_flush_matches(br, f, desc))
+ continue;
+
+ spin_lock_bh(&br->hash_lock);
+ if (!hlist_unhashed(&f->fdb_node))
fdb_delete(br, f, true);
+ spin_unlock_bh(&br->hash_lock);
}
- spin_unlock_bh(&br->hash_lock);
+ rcu_read_unlock();
+}
+
+static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state)
+{
+ unsigned long flags = 0;
+
+ if (ndm_state & NUD_PERMANENT)
+ __set_bit(BR_FDB_LOCAL, &flags);
+ if (ndm_state & NUD_NOARP)
+ __set_bit(BR_FDB_STATIC, &flags);
+
+ return flags;
+}
+
+static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags)
+{
+ unsigned long flags = 0;
+
+ if (ndm_flags & NTF_USE)
+ __set_bit(BR_FDB_ADDED_BY_USER, &flags);
+ if (ndm_flags & NTF_EXT_LEARNED)
+ __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags);
+ if (ndm_flags & NTF_OFFLOADED)
+ __set_bit(BR_FDB_OFFLOADED, &flags);
+ if (ndm_flags & NTF_STICKY)
+ __set_bit(BR_FDB_STICKY, &flags);
+
+ return flags;
+}
+
+static int __fdb_flush_validate_ifindex(const struct net_bridge *br,
+ int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device *dev;
+
+ dev = __dev_get_by_index(dev_net(br->dev), ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex");
+ return -ENODEV;
+ }
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_master(dev) && dev != br->dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Flush bridge device does not match target bridge device");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_port(dev)) {
+ struct net_bridge_port *p = br_port_get_rtnl(dev);
+
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
+ struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid };
+ struct net_bridge_port *p = NULL;
+ struct net_bridge *br;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br = p->br;
+ }
+
+ if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
+ return -EINVAL;
+ }
+ if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set");
+ return -EINVAL;
+ }
+
+ desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state);
+ desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags);
+ if (tb[NDA_NDM_STATE_MASK]) {
+ u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]);
+
+ desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask);
+ }
+ if (tb[NDA_NDM_FLAGS_MASK]) {
+ u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]);
+
+ desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask);
+ }
+ if (tb[NDA_IFINDEX]) {
+ int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]);
+
+ err = __fdb_flush_validate_ifindex(br, ifidx, extack);
+ if (err)
+ return err;
+ desc.port_ifindex = ifidx;
+ } else if (p) {
+ /* flush was invoked with port device and NTF_MASTER */
+ desc.port_ifindex = p->dev->ifindex;
+ }
+
+ br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n",
+ desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask);
+
+ br_fdb_flush(br, &desc);
+
+ return 0;
}
/* Flush all entries referring to a specific port.
@@ -1110,7 +1253,8 @@ static int __br_fdb_delete(struct net_bridge *br,
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ec646656dbf1..02bb620d3b8d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a52ad81596b7..47fcbade7389 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -517,16 +517,16 @@ void br_mtu_auto_adjust(struct net_bridge *br)
static void br_set_gso_limits(struct net_bridge *br)
{
- unsigned int gso_max_size = GSO_MAX_SIZE;
- u16 gso_max_segs = GSO_MAX_SEGS;
+ unsigned int tso_max_size = TSO_MAX_SIZE;
const struct net_bridge_port *p;
+ u16 tso_max_segs = TSO_MAX_SEGS;
list_for_each_entry(p, &br->port_list, list) {
- gso_max_size = min(gso_max_size, p->dev->gso_max_size);
- gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
+ tso_max_size = min(tso_max_size, p->dev->tso_max_size);
+ tso_max_segs = min(tso_max_segs, p->dev->tso_max_segs);
}
- netif_set_gso_max_size(br->dev, gso_max_size);
- netif_set_gso_max_segs(br->dev, gso_max_segs);
+ netif_set_tso_max_size(br->dev, tso_max_size);
+ netif_set_tso_max_segs(br->dev, tso_max_segs);
}
/*
@@ -615,6 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
br_multicast_del_port(p);
+ dev_put_track(dev, &p->dev_tracker);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -724,10 +725,10 @@ err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
br_multicast_del_port(p);
+ dev_put_track(dev, &p->dev_tracker);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
- dev_put(dev);
return err;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b50382f957c1..68b3e850bcb9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
dev_sw_netstats_rx_add(brdev, skb->len);
vg = br_vlan_group_rcu(br);
+
+ /* Reset the offload_fwd_mark because there could be a stacked
+ * bridge above, and it should not think this bridge it doing
+ * that bridge's work forwarding out its ports.
+ */
+ br_switchdev_frame_unmark(skb);
+
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc mode when someone
* may be running packet capture.
@@ -78,20 +85,38 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
u16 vid = 0;
u8 state;
- if (!p || p->state == BR_STATE_DISABLED)
+ if (!p)
goto drop;
+ br = p->br;
+
+ if (br_mst_is_enabled(br)) {
+ state = BR_STATE_FORWARDING;
+ } else {
+ if (p->state == BR_STATE_DISABLED)
+ goto drop;
+
+ state = p->state;
+ }
+
brmctx = &p->br->multicast_ctx;
pmctx = &p->multicast_ctx;
- state = p->state;
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
&state, &vlan))
goto out;
+ if (p->flags & BR_PORT_LOCKED) {
+ struct net_bridge_fdb_entry *fdb_src =
+ br_fdb_find_rcu(br, eth_hdr(skb)->h_source, vid);
+
+ if (!fdb_src || READ_ONCE(fdb_src->dst) != p ||
+ test_bit(BR_FDB_LOCAL, &fdb_src->flags))
+ goto drop;
+ }
+
nbp_switchdev_frame_mark(p, skb);
/* insert into forwarding database after filtering to avoid spoofing */
- br = p->br;
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
@@ -361,9 +386,13 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_PASS;
forward:
+ if (br_mst_is_enabled(p->br))
+ goto defer_stp_filtering;
+
switch (p->state) {
case BR_STATE_FORWARDING:
case BR_STATE_LEARNING:
+defer_stp_filtering:
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 4556d913955b..fdcc641fc89a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -251,14 +251,16 @@ static int __mdb_fill_info(struct sk_buff *skb,
__mdb_entry_fill_flags(&e, flags);
e.ifindex = ifindex;
e.vid = mp->addr.vid;
- if (mp->addr.proto == htons(ETH_P_IP))
+ if (mp->addr.proto == htons(ETH_P_IP)) {
e.addr.u.ip4 = mp->addr.dst.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- else if (mp->addr.proto == htons(ETH_P_IPV6))
+ } else if (mp->addr.proto == htons(ETH_P_IPV6)) {
e.addr.u.ip6 = mp->addr.dst.ip6;
#endif
- else
+ } else {
ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
+ e.state = MDB_PG_FLAGS_PERMANENT;
+ }
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
MDBA_MDB_ENTRY_INFO);
@@ -873,8 +875,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EINVAL;
/* host join errors which can happen before creating the group */
- if (!port) {
- /* don't allow any flags for host-joined groups */
+ if (!port && !br_group_is_l2(&group)) {
+ /* don't allow any flags for host-joined IP groups */
if (entry->state) {
NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
return -EINVAL;
diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c
new file mode 100644
index 000000000000..ee680adcee17
--- /dev/null
+++ b/net/bridge/br_mst.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Bridge Multiple Spanning Tree Support
+ *
+ * Authors:
+ * Tobias Waldekranz <tobias@waldekranz.com>
+ */
+
+#include <linux/kernel.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+DEFINE_STATIC_KEY_FALSE(br_mst_used);
+
+bool br_mst_enabled(const struct net_device *dev)
+{
+ if (!netif_is_bridge_master(dev))
+ return false;
+
+ return br_opt_get(netdev_priv(dev), BROPT_MST_ENABLED);
+}
+EXPORT_SYMBOL_GPL(br_mst_enabled);
+
+int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids)
+{
+ const struct net_bridge_vlan_group *vg;
+ const struct net_bridge_vlan *v;
+ const struct net_bridge *br;
+
+ ASSERT_RTNL();
+
+ if (!netif_is_bridge_master(dev))
+ return -EINVAL;
+
+ br = netdev_priv(dev);
+ if (!br_opt_get(br, BROPT_MST_ENABLED))
+ return -EINVAL;
+
+ vg = br_vlan_group(br);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->msti == msti)
+ __set_bit(v->vid, vids);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_mst_get_info);
+
+int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state)
+{
+ const struct net_bridge_port *p = NULL;
+ const struct net_bridge_vlan_group *vg;
+ const struct net_bridge_vlan *v;
+
+ ASSERT_RTNL();
+
+ p = br_port_get_check_rtnl(dev);
+ if (!p || !br_opt_get(p->br, BROPT_MST_ENABLED))
+ return -EINVAL;
+
+ vg = nbp_vlan_group(p);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->brvlan->msti == msti) {
+ *state = v->state;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(br_mst_get_state);
+
+static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v,
+ u8 state)
+{
+ struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+
+ if (v->state == state)
+ return;
+
+ br_vlan_set_state(v, state);
+
+ if (v->vid == vg->pvid)
+ br_vlan_set_pvid_state(vg, state);
+}
+
+int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_PORT_MST_STATE,
+ .orig_dev = p->dev,
+ .u.mst_state = {
+ .msti = msti,
+ .state = state,
+ },
+ };
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ int err;
+
+ vg = nbp_vlan_group(p);
+ if (!vg)
+ return 0;
+
+ /* MSTI 0 (CST) state changes are notified via the regular
+ * SWITCHDEV_ATTR_ID_PORT_STP_STATE.
+ */
+ if (msti) {
+ err = switchdev_port_attr_set(p->dev, &attr, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->brvlan->msti != msti)
+ continue;
+
+ br_mst_vlan_set_state(p, v, state);
+ }
+
+ return 0;
+}
+
+static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti)
+{
+ struct net_bridge_vlan_group *vg = nbp_vlan_group(pv->port);
+ struct net_bridge_vlan *v;
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ /* If this port already has a defined state in this
+ * MSTI (through some other VLAN membership), inherit
+ * it.
+ */
+ if (v != pv && v->brvlan->msti == msti) {
+ br_mst_vlan_set_state(pv->port, pv, v->state);
+ return;
+ }
+ }
+
+ /* Otherwise, start out in a new MSTI with all ports disabled. */
+ return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED);
+}
+
+int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_VLAN_MSTI,
+ .orig_dev = mv->br->dev,
+ .u.vlan_msti = {
+ .vid = mv->vid,
+ .msti = msti,
+ },
+ };
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *pv;
+ struct net_bridge_port *p;
+ int err;
+
+ if (mv->msti == msti)
+ return 0;
+
+ err = switchdev_port_attr_set(mv->br->dev, &attr, NULL);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ mv->msti = msti;
+
+ list_for_each_entry(p, &mv->br->port_list, list) {
+ vg = nbp_vlan_group(p);
+
+ pv = br_vlan_find(vg, mv->vid);
+ if (pv)
+ br_mst_vlan_sync_state(pv, msti);
+ }
+
+ return 0;
+}
+
+void br_mst_vlan_init_state(struct net_bridge_vlan *v)
+{
+ /* VLANs always start out in MSTI 0 (CST) */
+ v->msti = 0;
+
+ if (br_vlan_is_master(v))
+ v->state = BR_STATE_FORWARDING;
+ else
+ v->state = v->port->state;
+}
+
+int br_mst_set_enabled(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_MST,
+ .orig_dev = br->dev,
+ .u.mst = on,
+ };
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
+ int err;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ vg = nbp_vlan_group(p);
+
+ if (!vg->num_vlans)
+ continue;
+
+ NL_SET_ERR_MSG(extack,
+ "MST mode can't be changed while VLANs exist");
+ return -EBUSY;
+ }
+
+ if (br_opt_get(br, BROPT_MST_ENABLED) == on)
+ return 0;
+
+ err = switchdev_port_attr_set(br->dev, &attr, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ if (on)
+ static_branch_enable(&br_mst_used);
+ else
+ static_branch_disable(&br_mst_used);
+
+ br_opt_toggle(br, BROPT_MST_ENABLED, on);
+ return 0;
+}
+
+size_t br_mst_info_size(const struct net_bridge_vlan_group *vg)
+{
+ DECLARE_BITMAP(seen, VLAN_N_VID) = { 0 };
+ const struct net_bridge_vlan *v;
+ size_t sz;
+
+ /* IFLA_BRIDGE_MST */
+ sz = nla_total_size(0);
+
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
+ if (test_bit(v->brvlan->msti, seen))
+ continue;
+
+ /* IFLA_BRIDGE_MST_ENTRY */
+ sz += nla_total_size(0) +
+ /* IFLA_BRIDGE_MST_ENTRY_MSTI */
+ nla_total_size(sizeof(u16)) +
+ /* IFLA_BRIDGE_MST_ENTRY_STATE */
+ nla_total_size(sizeof(u8));
+
+ __set_bit(v->brvlan->msti, seen);
+ }
+
+ return sz;
+}
+
+int br_mst_fill_info(struct sk_buff *skb,
+ const struct net_bridge_vlan_group *vg)
+{
+ DECLARE_BITMAP(seen, VLAN_N_VID) = { 0 };
+ const struct net_bridge_vlan *v;
+ struct nlattr *nest;
+ int err = 0;
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (test_bit(v->brvlan->msti, seen))
+ continue;
+
+ nest = nla_nest_start_noflag(skb, IFLA_BRIDGE_MST_ENTRY);
+ if (!nest ||
+ nla_put_u16(skb, IFLA_BRIDGE_MST_ENTRY_MSTI, v->brvlan->msti) ||
+ nla_put_u8(skb, IFLA_BRIDGE_MST_ENTRY_STATE, v->state)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ nla_nest_end(skb, nest);
+
+ __set_bit(v->brvlan->msti, seen);
+ }
+
+ return err;
+}
+
+static const struct nla_policy br_mst_nl_policy[IFLA_BRIDGE_MST_ENTRY_MAX + 1] = {
+ [IFLA_BRIDGE_MST_ENTRY_MSTI] = NLA_POLICY_RANGE(NLA_U16,
+ 1, /* 0 reserved for CST */
+ VLAN_N_VID - 1),
+ [IFLA_BRIDGE_MST_ENTRY_STATE] = NLA_POLICY_RANGE(NLA_U8,
+ BR_STATE_DISABLED,
+ BR_STATE_BLOCKING),
+};
+
+static int br_mst_process_one(struct net_bridge_port *p,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MST_ENTRY_MAX + 1];
+ u16 msti;
+ u8 state;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MST_ENTRY_MAX, attr,
+ br_mst_nl_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MST_ENTRY_MSTI]) {
+ NL_SET_ERR_MSG_MOD(extack, "MSTI not specified");
+ return -EINVAL;
+ }
+
+ if (!tb[IFLA_BRIDGE_MST_ENTRY_STATE]) {
+ NL_SET_ERR_MSG_MOD(extack, "State not specified");
+ return -EINVAL;
+ }
+
+ msti = nla_get_u16(tb[IFLA_BRIDGE_MST_ENTRY_MSTI]);
+ state = nla_get_u8(tb[IFLA_BRIDGE_MST_ENTRY_STATE]);
+
+ return br_mst_set_state(p, msti, state, extack);
+}
+
+int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *attr;
+ int err, msts = 0;
+ int rem;
+
+ if (!br_opt_get(p->br, BROPT_MST_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't modify MST state when MST is disabled");
+ return -EBUSY;
+ }
+
+ nla_for_each_nested(attr, mst_attr, rem) {
+ switch (nla_type(attr)) {
+ case IFLA_BRIDGE_MST_ENTRY:
+ err = br_mst_process_one(p, attr, extack);
+ break;
+ default:
+ continue;
+ }
+
+ msts++;
+ if (err)
+ break;
+ }
+
+ if (!msts) {
+ NL_SET_ERR_MSG_MOD(extack, "Found no MST entries to process");
+ err = -EINVAL;
+ }
+
+ return err;
+}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index de2409889489..db4f2641d1cd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -82,6 +82,9 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
+static int br_mc_disabled_update(struct net_device *dev, bool value,
+ struct netlink_ext_ack *extack);
+
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
struct net_bridge_port_group_sg_key *sg_p)
@@ -1156,6 +1159,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 4fd882686b04..ff4779036649 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
- for (i = 0; i < e->num_hook_entries &&
- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
- ;
+ for (i = 0; i < e->num_hook_entries; i++) {
+ /* These hooks have already been called */
+ if (ops[i]->priority < NF_BR_PRI_BRNF)
+ continue;
+
+ /* These hooks have not been called yet, run them. */
+ if (ops[i]->priority > NF_BR_PRI_BRNF)
+ break;
+
+ /* take a closer look at NF_BR_PRI_BRNF. */
+ if (ops[i]->hook == br_nf_pre_routing) {
+ /* This hook diverted the skb to this function,
+ * hooks after this have not been run yet.
+ */
+ i++;
+ break;
+ }
+ }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 2ff83d84230d..bb01776d2d88 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -119,6 +119,9 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev,
/* Each VLAN is returned in bridge_vlan_info along with flags */
vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info));
+ if (p && vg && (filter_mask & RTEXT_FILTER_MST))
+ vinfo_sz += br_mst_info_size(vg);
+
if (!(filter_mask & RTEXT_FILTER_CFM_STATUS))
return vinfo_sz;
@@ -184,6 +187,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
+ + nla_total_size(1) /* IFLA_BRPORT_LOCKED */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -269,7 +273,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
BR_MRP_LOST_CONT)) ||
nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
- nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
+ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) ||
+ nla_put_u8(skb, IFLA_BRPORT_LOCKED, !!(p->flags & BR_PORT_LOCKED)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -483,7 +488,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
RTEXT_FILTER_BRVLAN_COMPRESSED |
RTEXT_FILTER_MRP |
RTEXT_FILTER_CFM_CONFIG |
- RTEXT_FILTER_CFM_STATUS)) {
+ RTEXT_FILTER_CFM_STATUS |
+ RTEXT_FILTER_MST)) {
af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
if (!af)
goto nla_put_failure;
@@ -562,7 +568,28 @@ static int br_fill_ifinfo(struct sk_buff *skb,
nla_nest_end(skb, cfm_nest);
}
+ if ((filter_mask & RTEXT_FILTER_MST) &&
+ br_opt_get(br, BROPT_MST_ENABLED) && port) {
+ const struct net_bridge_vlan_group *vg = nbp_vlan_group(port);
+ struct nlattr *mst_nest;
+ int err;
+
+ if (!vg || !vg->num_vlans)
+ goto done;
+
+ mst_nest = nla_nest_start(skb, IFLA_BRIDGE_MST);
+ if (!mst_nest)
+ goto nla_put_failure;
+
+ err = br_mst_fill_info(skb, vg);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, mst_nest);
+ }
+
done:
+
if (af)
nla_nest_end(skb, af);
nlmsg_end(skb, nlh);
@@ -801,6 +828,23 @@ static int br_afspec(struct net_bridge *br,
if (err)
return err;
break;
+ case IFLA_BRIDGE_MST:
+ if (!p) {
+ NL_SET_ERR_MSG(extack,
+ "MST states can only be set on bridge ports");
+ return -EINVAL;
+ }
+
+ if (cmd != RTM_SETLINK) {
+ NL_SET_ERR_MSG(extack,
+ "MST states can only be set through RTM_SETLINK");
+ return -EINVAL;
+ }
+
+ err = br_mst_process(p, attr, extack);
+ if (err)
+ return err;
+ break;
}
}
@@ -827,6 +871,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
[IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LOCKED] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
};
@@ -893,6 +938,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS);
br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
+ br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED);
changed_mask = old_flags ^ p->flags;
@@ -1280,8 +1326,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
br_recalculate_fwd_mask(br);
}
- if (data[IFLA_BR_FDB_FLUSH])
- br_fdb_flush(br);
+ if (data[IFLA_BR_FDB_FLUSH]) {
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BR_FDB_STATIC
+ };
+
+ br_fdb_flush(br, &desc);
+ }
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (data[IFLA_BR_MCAST_ROUTER]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2661dda1a92b..06e5f6faa431 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -178,6 +178,7 @@ enum {
* @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
* @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
* context
+ * @msti: if MASTER flag set, this holds the VLANs MST instance
* @vlist: sorted list of VLAN entries
* @rcu: used for entry destruction
*
@@ -210,6 +211,8 @@ struct net_bridge_vlan {
struct net_bridge_mcast_port port_mcast_ctx;
};
+ u16 msti;
+
struct list_head vlist;
struct rcu_head rcu;
@@ -271,6 +274,13 @@ struct net_bridge_fdb_entry {
struct rcu_head rcu;
};
+struct net_bridge_fdb_flush_desc {
+ unsigned long flags;
+ unsigned long flags_mask;
+ int port_ifindex;
+ u16 vlan_id;
+};
+
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
@@ -445,6 +455,7 @@ enum net_bridge_opts {
BROPT_NO_LL_LEARN,
BROPT_VLAN_BRIDGE_BINDING,
BROPT_MCAST_VLAN_SNOOPING_ENABLED,
+ BROPT_MST_ENABLED,
};
struct net_bridge {
@@ -751,11 +762,17 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
#endif
/* br_fdb.c */
+#define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF)
+#define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP)
+#define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \
+ NTF_STICKY | NTF_OFFLOADED)
+
int br_fdb_init(void);
void br_fdb_fini(void);
int br_fdb_hash_init(struct net_bridge *br);
void br_fdb_hash_fini(struct net_bridge *br);
-void br_fdb_flush(struct net_bridge *br);
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc);
void br_fdb_find_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
@@ -776,7 +793,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, const unsigned char *addr, u16 vid);
+ struct net_device *dev, const unsigned char *addr, u16 vid,
+ struct netlink_ext_ack *extack);
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
struct netlink_ext_ack *extack);
@@ -1765,6 +1786,63 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow)
}
#endif
+/* br_mst.c */
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+DECLARE_STATIC_KEY_FALSE(br_mst_used);
+static inline bool br_mst_is_enabled(struct net_bridge *br)
+{
+ return static_branch_unlikely(&br_mst_used) &&
+ br_opt_get(br, BROPT_MST_ENABLED);
+}
+
+int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
+ struct netlink_ext_ack *extack);
+int br_mst_vlan_set_msti(struct net_bridge_vlan *v, u16 msti);
+void br_mst_vlan_init_state(struct net_bridge_vlan *v);
+int br_mst_set_enabled(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack);
+size_t br_mst_info_size(const struct net_bridge_vlan_group *vg);
+int br_mst_fill_info(struct sk_buff *skb,
+ const struct net_bridge_vlan_group *vg);
+int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr,
+ struct netlink_ext_ack *extack);
+#else
+static inline bool br_mst_is_enabled(struct net_bridge *br)
+{
+ return false;
+}
+
+static inline int br_mst_set_state(struct net_bridge_port *p, u16 msti,
+ u8 state, struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_mst_set_enabled(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline size_t br_mst_info_size(const struct net_bridge_vlan_group *vg)
+{
+ return 0;
+}
+
+static inline int br_mst_fill_info(struct sk_buff *skb,
+ const struct net_bridge_vlan_group *vg)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_mst_process(struct net_bridge_port *p,
+ const struct nlattr *mst_attr,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
struct nf_br_ops {
int (*br_dev_xmit_hook)(struct sk_buff *skb);
};
@@ -1985,7 +2063,7 @@ void br_switchdev_mdb_notify(struct net_device *dev,
struct net_bridge_port_group *pg,
int type);
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
- struct netlink_ext_ack *extack);
+ bool changed, struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
void br_switchdev_init(struct net_bridge *br);
@@ -2052,8 +2130,8 @@ static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
}
-static inline int br_switchdev_port_vlan_add(struct net_device *dev,
- u16 vid, u16 flags,
+static inline int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid,
+ u16 flags, bool changed,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1d80f34a139c..7d27b2e6038f 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -43,6 +43,12 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
return;
p->state = state;
+ if (br_opt_get(p->br, BROPT_MST_ENABLED)) {
+ err = br_mst_set_state(p, 0, state, NULL);
+ if (err)
+ br_warn(p->br, "error setting MST state on port %u(%s)\n",
+ p->port_no, netdev_name(p->dev));
+ }
err = switchdev_port_attr_set(p->dev, &attr, NULL);
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index f8fbaaa7c501..8f3d76c751dd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -72,7 +72,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
/* Flags that can be offloaded to hardware */
#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
- BR_MCAST_FLOOD | BR_BCAST_FLOOD)
+ BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \
+ BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST)
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
@@ -160,13 +161,14 @@ br_switchdev_fdb_notify(struct net_bridge *br,
}
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
- struct netlink_ext_ack *extack)
+ bool changed, struct netlink_ext_ack *extack)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.flags = flags,
.vid = vid,
+ .changed = changed,
};
return switchdev_port_obj_add(dev, &v.obj, extack);
@@ -330,6 +332,48 @@ br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx,
return err;
}
+static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
+ const void *ctx,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_port_attr_info attr_info = {
+ .info = {
+ .dev = br_dev,
+ .extack = extack,
+ .ctx = ctx,
+ },
+ };
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_vlan_group *vg;
+ struct switchdev_attr attr;
+ struct net_bridge_vlan *v;
+ int err;
+
+ attr_info.attr = &attr;
+ attr.orig_dev = br_dev;
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (v->msti) {
+ attr.id = SWITCHDEV_ATTR_ID_VLAN_MSTI;
+ attr.u.vlan_msti.vid = v->vid;
+ attr.u.vlan_msti.msti = v->msti;
+
+ err = nb->notifier_call(nb, SWITCHDEV_PORT_ATTR_SET,
+ &attr_info);
+ err = notifier_to_errno(err);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int
br_switchdev_vlan_replay_one(struct notifier_block *nb,
struct net_device *dev,
@@ -351,19 +395,50 @@ br_switchdev_vlan_replay_one(struct notifier_block *nb,
return notifier_to_errno(err);
}
+static int br_switchdev_vlan_replay_group(struct notifier_block *nb,
+ struct net_device *dev,
+ struct net_bridge_vlan_group *vg,
+ const void *ctx, unsigned long action,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan *v;
+ int err = 0;
+ u16 pvid;
+
+ if (!vg)
+ return 0;
+
+ pvid = br_get_pvid(vg);
+
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.orig_dev = dev,
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = br_vlan_flags(v, pvid),
+ .vid = v->vid,
+ };
+
+ if (!br_vlan_should_use(v))
+ continue;
+
+ err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
+ action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int br_switchdev_vlan_replay(struct net_device *br_dev,
- struct net_device *dev,
const void *ctx, bool adding,
struct notifier_block *nb,
struct netlink_ext_ack *extack)
{
- struct net_bridge_vlan_group *vg;
- struct net_bridge_vlan *v;
+ struct net_bridge *br = netdev_priv(br_dev);
struct net_bridge_port *p;
- struct net_bridge *br;
unsigned long action;
- int err = 0;
- u16 pvid;
+ int err;
ASSERT_RTNL();
@@ -373,49 +448,33 @@ static int br_switchdev_vlan_replay(struct net_device *br_dev,
if (!netif_is_bridge_master(br_dev))
return -EINVAL;
- if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
- return -EINVAL;
-
- if (netif_is_bridge_master(dev)) {
- br = netdev_priv(dev);
- vg = br_vlan_group(br);
- p = NULL;
- } else {
- p = br_port_get_rtnl(dev);
- if (WARN_ON(!p))
- return -EINVAL;
- vg = nbp_vlan_group(p);
- br = p->br;
- }
-
- if (!vg)
- return 0;
-
if (adding)
action = SWITCHDEV_PORT_OBJ_ADD;
else
action = SWITCHDEV_PORT_OBJ_DEL;
- pvid = br_get_pvid(vg);
+ err = br_switchdev_vlan_replay_group(nb, br_dev, br_vlan_group(br),
+ ctx, action, extack);
+ if (err)
+ return err;
- list_for_each_entry(v, &vg->vlan_list, vlist) {
- struct switchdev_obj_port_vlan vlan = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .flags = br_vlan_flags(v, pvid),
- .vid = v->vid,
- };
+ list_for_each_entry(p, &br->port_list, list) {
+ struct net_device *dev = p->dev;
- if (!br_vlan_should_use(v))
- continue;
+ err = br_switchdev_vlan_replay_group(nb, dev,
+ nbp_vlan_group(p),
+ ctx, action, extack);
+ if (err)
+ return err;
+ }
- err = br_switchdev_vlan_replay_one(nb, dev, &vlan, ctx,
- action, extack);
+ if (adding) {
+ err = br_switchdev_vlan_attr_replay(br_dev, ctx, nb, extack);
if (err)
return err;
}
- return err;
+ return 0;
}
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
@@ -681,8 +740,7 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
struct net_device *dev = p->dev;
int err;
- err = br_switchdev_vlan_replay(br_dev, dev, ctx, true, blocking_nb,
- extack);
+ err = br_switchdev_vlan_replay(br_dev, ctx, true, blocking_nb, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -706,11 +764,11 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
struct net_device *br_dev = p->br->dev;
struct net_device *dev = p->dev;
- br_switchdev_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+ br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
- br_switchdev_fdb_replay(br_dev, ctx, false, atomic_nb);
+ br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 3f7ca88c2aa3..612e367fff20 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -344,7 +344,11 @@ static DEVICE_ATTR_RW(group_addr);
static int set_flush(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br_fdb_flush(br);
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BR_FDB_STATIC
+ };
+
+ br_fdb_flush(br, &desc);
return 0;
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 84ba456a78cc..0f5e75ccac79 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,53 +34,70 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
}
-static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg,
+static void __vlan_add_pvid(struct net_bridge_vlan_group *vg,
const struct net_bridge_vlan *v)
{
if (vg->pvid == v->vid)
- return false;
+ return;
smp_wmb();
br_vlan_set_pvid_state(vg, v->state);
vg->pvid = v->vid;
-
- return true;
}
-static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid != vid)
- return false;
+ return;
smp_wmb();
vg->pvid = 0;
-
- return true;
}
-/* return true if anything changed, false otherwise */
-static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
+/* Update the BRIDGE_VLAN_INFO_PVID and BRIDGE_VLAN_INFO_UNTAGGED flags of @v.
+ * If @commit is false, return just whether the BRIDGE_VLAN_INFO_PVID and
+ * BRIDGE_VLAN_INFO_UNTAGGED bits of @flags would produce any change onto @v.
+ */
+static bool __vlan_flags_update(struct net_bridge_vlan *v, u16 flags,
+ bool commit)
{
struct net_bridge_vlan_group *vg;
- u16 old_flags = v->flags;
- bool ret;
+ bool change;
if (br_vlan_is_master(v))
vg = br_vlan_group(v->br);
else
vg = nbp_vlan_group(v->port);
+ /* check if anything would be changed on commit */
+ change = !!(flags & BRIDGE_VLAN_INFO_PVID) == !!(vg->pvid != v->vid) ||
+ ((flags ^ v->flags) & BRIDGE_VLAN_INFO_UNTAGGED);
+
+ if (!commit)
+ goto out;
+
if (flags & BRIDGE_VLAN_INFO_PVID)
- ret = __vlan_add_pvid(vg, v);
+ __vlan_add_pvid(vg, v);
else
- ret = __vlan_delete_pvid(vg, v->vid);
+ __vlan_delete_pvid(vg, v->vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
else
v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
- return ret || !!(old_flags ^ v->flags);
+out:
+ return change;
+}
+
+static bool __vlan_flags_would_change(struct net_bridge_vlan *v, u16 flags)
+{
+ return __vlan_flags_update(v, flags, false);
+}
+
+static void __vlan_flags_commit(struct net_bridge_vlan *v, u16 flags)
+{
+ __vlan_flags_update(v, flags, true);
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -92,7 +109,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q add.
*/
- err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack);
if (err == -EOPNOTSUPP)
return vlan_vid_add(dev, br->vlan_proto, v->vid);
v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV;
@@ -209,6 +226,24 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
kfree(v);
}
+static void br_vlan_init_state(struct net_bridge_vlan *v)
+{
+ struct net_bridge *br;
+
+ if (br_vlan_is_master(v))
+ br = v->br;
+ else
+ br = v->port->br;
+
+ if (br_opt_get(br, BROPT_MST_ENABLED)) {
+ br_mst_vlan_init_state(v);
+ return;
+ }
+
+ v->state = BR_STATE_FORWARDING;
+ v->msti = 0;
+}
+
/* This is the shared VLAN add function which works for both ports and bridge
* devices. There are four possible calls to this function in terms of the
* vlan entry type:
@@ -284,9 +319,12 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
}
br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
} else {
- err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
- if (err && err != -EOPNOTSUPP)
- goto out;
+ if (br_vlan_should_use(v)) {
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags,
+ false, extack);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ }
br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
}
@@ -302,7 +340,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
}
/* set the state before publishing */
- v->state = BR_STATE_FORWARDING;
+ br_vlan_init_state(v);
err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode,
br_vlan_rht_params);
@@ -310,7 +348,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
goto out_fdb_insert;
__vlan_add_list(v);
- __vlan_add_flags(v, flags);
+ __vlan_flags_commit(v, flags);
br_multicast_toggle_one_vlan(v, true);
if (p)
@@ -404,6 +442,7 @@ static void __vlan_flush(const struct net_bridge *br,
{
struct net_bridge_vlan *vlan, *tmp;
u16 v_start = 0, v_end = 0;
+ int err;
__vlan_delete_pvid(vg, vg->pvid);
list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) {
@@ -417,7 +456,13 @@ static void __vlan_flush(const struct net_bridge *br,
}
v_end = vlan->vid;
- __vlan_del(vlan);
+ err = __vlan_del(vlan);
+ if (err) {
+ br_err(br,
+ "port %u(%s) failed to delete vlan %d: %pe\n",
+ (unsigned int) p->port_no, p->dev->name,
+ vlan->vid, ERR_PTR(err));
+ }
}
/* notify about the last/whole vlan range */
@@ -560,10 +605,10 @@ static bool __allowed_ingress(const struct net_bridge *br,
!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
- return br_vlan_state_allowed(*state, true);
- } else {
- return true;
+ if (!br_vlan_state_allowed(*state, true))
+ goto drop;
}
+ return true;
}
}
v = br_vlan_find(vg, *vid);
@@ -670,18 +715,29 @@ static int br_vlan_add_existing(struct net_bridge *br,
u16 flags, bool *changed,
struct netlink_ext_ack *extack)
{
+ bool would_change = __vlan_flags_would_change(vlan, flags);
+ bool becomes_brentry = false;
int err;
- err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
if (!br_vlan_is_brentry(vlan)) {
/* Trying to change flags of non-existent bridge vlan */
- if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) {
- err = -EINVAL;
- goto err_flags;
- }
+ if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
+ return -EINVAL;
+
+ becomes_brentry = true;
+ }
+
+ /* Master VLANs that aren't brentries weren't notified before,
+ * time to notify them now.
+ */
+ if (becomes_brentry || would_change) {
+ err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags,
+ would_change, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ if (becomes_brentry) {
/* It was only kept for port vlans, now make it real */
err = br_fdb_add_local(br, NULL, br->dev->dev_addr, vlan->vid);
if (err) {
@@ -696,13 +752,13 @@ static int br_vlan_add_existing(struct net_bridge *br,
br_multicast_toggle_one_vlan(vlan, true);
}
- if (__vlan_add_flags(vlan, flags))
+ __vlan_flags_commit(vlan, flags);
+ if (would_change)
*changed = true;
return 0;
err_fdb_insert:
-err_flags:
br_switchdev_port_vlan_del(br->dev, vlan->vid);
return err;
}
@@ -1247,11 +1303,18 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
*changed = false;
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
- /* Pass the flags to the hardware bridge */
- ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack);
- if (ret && ret != -EOPNOTSUPP)
- return ret;
- *changed = __vlan_add_flags(vlan, flags);
+ bool would_change = __vlan_flags_would_change(vlan, flags);
+
+ if (would_change) {
+ /* Pass the flags to the hardware bridge */
+ ret = br_switchdev_port_vlan_add(port->dev, vid, flags,
+ true, extack);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+ }
+
+ __vlan_flags_commit(vlan, flags);
+ *changed = would_change;
return 0;
}
@@ -2020,7 +2083,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto out_err;
}
err = br_vlan_dump_dev(dev, skb, cb, dump_flags);
- if (err && err != -EMSGSIZE)
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
goto out_err;
} else {
for_each_netdev_rcu(net, dev) {
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index a6382973b3e7..a2724d03278c 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -99,6 +99,11 @@ static int br_vlan_modify_state(struct net_bridge_vlan_group *vg,
return -EBUSY;
}
+ if (br_opt_get(br, BROPT_MST_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't modify vlan state directly when MST is enabled");
+ return -EBUSY;
+ }
+
if (v->state == state)
return 0;
@@ -291,6 +296,7 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *r_end)
{
return v_curr->vid - r_end->vid == 1 &&
+ v_curr->msti == r_end->msti &&
((v_curr->priv_flags ^ r_end->priv_flags) &
BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 &&
br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx,
@@ -379,6 +385,9 @@ bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
#endif
#endif
+ if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_MSTI, v_opts->msti))
+ goto out_err;
+
nla_nest_end(skb, nest);
return true;
@@ -410,6 +419,7 @@ static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v)
+ nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+ br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
#endif
+ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_MSTI */
+ nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */
}
@@ -559,6 +569,15 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
}
#endif
#endif
+ if (tb[BRIDGE_VLANDB_GOPTS_MSTI]) {
+ u16 msti;
+
+ msti = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_MSTI]);
+ err = br_mst_vlan_set_msti(v, msti);
+ if (err)
+ return err;
+ *changed = true;
+ }
return 0;
}
@@ -578,6 +597,7 @@ static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = {
[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MSTI] = NLA_POLICY_MAX(NLA_U16, VLAN_N_VID - 1),
};
int br_vlan_rtm_process_global_options(struct net_device *dev,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index fdbed3158555..73242962be5d 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -32,6 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ bool mono_delivery_time = skb->mono_delivery_time;
unsigned int hlen, ll_rs, mtu;
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
@@ -81,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
- skb->tstamp = tstamp;
+ skb_set_delivery_time(skb, tstamp, mono_delivery_time);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -112,7 +113,7 @@ slow_path:
goto blackhole;
}
- skb2->tstamp = tstamp;
+ skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
@@ -380,7 +381,7 @@ static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
protoff = skb_network_offset(skb) + ip_hdrlen(skb);
break;
case htons(ETH_P_IPV6): {
- unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index c1ef9cc89b78..8c3eaba87ad2 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -87,6 +87,7 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
return nft_meta_get_init(ctx, expr, tb);
}
+ priv->len = len;
return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, len);
}
@@ -98,6 +99,7 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = {
.eval = nft_meta_bridge_get_eval,
.init = nft_meta_bridge_get_init,
.dump = nft_meta_get_dump,
+ .reduce = nft_meta_get_reduce,
};
static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
@@ -112,8 +114,7 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops)
continue;
- track->regs[i].selector = NULL;
- track->regs[i].bitwise = NULL;
+ __nft_reg_track_cancel(track, i);
}
return false;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index eba0efe64d05..71b54fed7263 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
@@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook);
+ nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook);
if (!nskb)
return;
@@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
{
struct sk_buff *nskb;
- nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code);
+ nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code);
if (!nskb)
return;
@@ -185,6 +185,7 @@ static const struct nft_expr_ops nft_reject_bridge_ops = {
.init = nft_reject_init,
.dump = nft_reject_dump,
.validate = nft_reject_bridge_validate,
+ .reduce = NFT_REDUCE_READONLY,
};
static struct nft_expr_type nft_reject_bridge_type __read_mostly = {
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 440139706130..52dd0b6835bc 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -268,7 +268,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
err = caifd->layer.up->receive(caifd->layer.up, pkt);
- /* For -EILSEQ the packet is not freed so so it now */
+ /* For -EILSEQ the packet is not freed so free it now */
if (err == -EILSEQ)
cfpkt_destroy(pkt);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2b8892d502f7..251e666ba9a2 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -282,7 +282,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
if (flags & MSG_OOB)
goto read_error;
- skb = skb_recv_datagram(sk, flags, 0 , &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
goto read_error;
copylen = skb->len;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 414dc5671c45..4d63ef13a1fd 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -99,7 +99,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
else
skb->ip_summed = CHECKSUM_NONE;
- netif_rx_any_context(skb);
+ netif_rx(skb);
/* Update statistics. */
priv->netdev->stats.rx_packets++;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index cce2af10eb3e..1fb49d51b25d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -284,7 +284,7 @@ int can_send(struct sk_buff *skb, int loop)
}
if (newskb)
- netif_rx_ni(newskb);
+ netif_rx(newskb);
/* update statistics */
pkg_stats->tx_frames++;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index bc88d901a1c0..e60161bec850 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -100,6 +100,7 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
struct bcm_op {
struct list_head list;
+ struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
@@ -193,7 +194,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
{
char ifname[IFNAMSIZ];
struct net *net = m->private;
- struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode);
+ struct sock *sk = (struct sock *)pde_data(m->file->f_inode);
struct bcm_sock *bo = bcm_sk(sk);
struct bcm_op *op;
@@ -718,10 +719,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
return NULL;
}
-static void bcm_remove_op(struct bcm_op *op)
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
{
- hrtimer_cancel(&op->timer);
- hrtimer_cancel(&op->thrtimer);
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -732,6 +732,14 @@ static void bcm_remove_op(struct bcm_op *op)
kfree(op);
}
+static void bcm_remove_op(struct bcm_op *op)
+{
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
+
+ call_rcu(&op->rcu, bcm_free_op_rcu);
+}
+
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
@@ -757,6 +765,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+ /* disable automatic timer on frame reception */
+ op->flags |= RX_NO_AUTOTIMER;
+
/*
* Don't care if we're bound or not (due to netdev
* problems) can_rx_unregister() is always a save
@@ -785,7 +796,6 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
bcm_rx_handler, op);
list_del(&op->list);
- synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
@@ -1632,12 +1642,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error = 0;
- int noblock;
int err;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
- skb = skb_recv_datagram(sk, flags, noblock, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
@@ -1650,7 +1657,7 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return err;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(BCM_MIN_NAMELEN);
diff --git a/net/can/gw.c b/net/can/gw.c
index d8861e862f15..1ea4cc527db3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -577,6 +577,13 @@ static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj)
gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}
+static void cgw_job_free_rcu(struct rcu_head *rcu_head)
+{
+ struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu);
+
+ kmem_cache_free(cgw_cache, gwj);
+}
+
static int cgw_notifier(struct notifier_block *nb,
unsigned long msg, void *ptr)
{
@@ -596,8 +603,7 @@ static int cgw_notifier(struct notifier_block *nb,
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
}
}
}
@@ -1155,8 +1161,7 @@ static void cgw_remove_all_jobs(struct net *net)
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
}
}
@@ -1224,8 +1229,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
- synchronize_rcu();
- kmem_cache_free(cgw_cache, gwj);
+ call_rcu(&gwj->rcu, cgw_job_free_rcu);
err = 0;
break;
}
@@ -1239,16 +1243,19 @@ static int __net_init cangw_pernet_init(struct net *net)
return 0;
}
-static void __net_exit cangw_pernet_exit(struct net *net)
+static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
+ struct net *net;
+
rtnl_lock();
- cgw_remove_all_jobs(net);
+ list_for_each_entry(net, net_list, exit_list)
+ cgw_remove_all_jobs(net);
rtnl_unlock();
}
static struct pernet_operations cangw_pernet_ops = {
.init = cangw_pernet_init,
- .exit = cangw_pernet_exit,
+ .exit_batch = cangw_pernet_exit_batch,
};
static __init int cgw_module_init(void)
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 02cbcb2ecf0d..43a27d19cdac 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -14,7 +14,6 @@
* - use CAN_ISOTP_WAIT_TX_DONE flag to block the caller until the PDU is sent
* - as we have static buffers the check whether the PDU fits into the buffer
* is done at FF reception time (no support for sending 'wait frames')
- * - take care of the tx-queue-len as traffic shaping is still on the TODO list
*
* Copyright (c) 2020 Volkswagen Group Electronic Research
* All rights reserved.
@@ -56,6 +55,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/spinlock.h>
#include <linux/hrtimer.h>
#include <linux/wait.h>
#include <linux/uio.h>
@@ -86,9 +86,9 @@ MODULE_ALIAS("can-proto-6");
/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can
* take full 32 bit values (4 Gbyte). We would need some good concept to handle
* this between user space and kernel space. For now increase the static buffer
- * to something about 8 kbyte to be able to test this new functionality.
+ * to something about 64 kbyte to be able to test this new functionality.
*/
-#define MAX_MSG_LENGTH 8200
+#define MAX_MSG_LENGTH 66000
/* N_PCI type values in bits 7-4 of N_PCI bytes */
#define N_PCI_SF 0x00 /* single frame */
@@ -104,6 +104,7 @@ MODULE_ALIAS("can-proto-6");
#define FC_CONTENT_SZ 3 /* flow control content size in byte (FS/BS/STmin) */
#define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA)
+#define ISOTP_ALL_BC_FLAGS (CAN_ISOTP_SF_BROADCAST | CAN_ISOTP_CF_BROADCAST)
/* Flow Status given in FC frame */
#define ISOTP_FC_CTS 0 /* clear to send */
@@ -140,11 +141,14 @@ struct isotp_sock {
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
+ u32 frame_txtime;
u32 force_tx_stmin;
u32 force_rx_stmin;
+ u32 cfecho; /* consecutive frame echo tag */
struct tpcon rx, tx;
struct list_head notifier;
wait_queue_head_t wait;
+ spinlock_t rx_lock; /* protect single thread state machine */
};
static LIST_HEAD(isotp_notifier_list);
@@ -156,6 +160,23 @@ static inline struct isotp_sock *isotp_sk(const struct sock *sk)
return (struct isotp_sock *)sk;
}
+static u32 isotp_bc_flags(struct isotp_sock *so)
+{
+ return so->opt.flags & ISOTP_ALL_BC_FLAGS;
+}
+
+static bool isotp_register_rxid(struct isotp_sock *so)
+{
+ /* no broadcast modes => register rx_id for FC frame reception */
+ return (isotp_bc_flags(so) == 0);
+}
+
+static bool isotp_register_txecho(struct isotp_sock *so)
+{
+ /* all modes but SF_BROADCAST register for tx echo skbs */
+ return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
+}
+
static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -358,7 +379,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
so->tx_gap = ktime_set(0, 0);
/* add transmission time for CAN frame N_As */
- so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+ so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
/* add waiting time for consecutive frames N_Cs */
if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
so->tx_gap = ktime_add_ns(so->tx_gap,
@@ -615,11 +636,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
n_pci_type = cf->data[ae] & 0xF0;
+ /* Make sure the state changes and data structures stay consistent at
+ * CAN frame reception time. This locking is not needed in real world
+ * use cases but the inconsistency can be triggered with syzkaller.
+ */
+ spin_lock(&so->rx_lock);
+
if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
/* check rx/tx path half duplex expectations */
if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
(so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
- return;
+ goto out_unlock;
}
switch (n_pci_type) {
@@ -668,6 +695,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
isotp_rcv_cf(sk, cf, ae, skb);
break;
}
+
+out_unlock:
+ spin_unlock(&so->rx_lock);
}
static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
@@ -701,6 +731,63 @@ static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
cf->data[0] = so->opt.ext_address;
}
+static void isotp_send_cframe(struct isotp_sock *so)
+{
+ struct sock *sk = &so->sk;
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf;
+ int can_send_ret;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev)
+ return;
+
+ skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv), GFP_ATOMIC);
+ if (!skb) {
+ dev_put(dev);
+ return;
+ }
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+ cf = (struct canfd_frame *)skb->data;
+ skb_put_zero(skb, so->ll.mtu);
+
+ /* create consecutive frame */
+ isotp_fill_dataframe(cf, so, ae, 0);
+
+ /* place consecutive frame N_PCI in appropriate index */
+ cf->data[ae] = N_PCI_CF | so->tx.sn++;
+ so->tx.sn %= 16;
+ so->tx.bs++;
+
+ cf->flags = so->ll.tx_flags;
+
+ skb->dev = dev;
+ can_skb_set_owner(skb, sk);
+
+ /* cfecho should have been zero'ed by init/isotp_rcv_echo() */
+ if (so->cfecho)
+ pr_notice_once("can-isotp: cfecho is %08X != 0\n", so->cfecho);
+
+ /* set consecutive frame echo tag */
+ so->cfecho = *(u32 *)cf->data;
+
+ /* send frame with local echo enabled */
+ can_send_ret = can_send(skb, 1);
+ if (can_send_ret) {
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(can_send_ret));
+ if (can_send_ret == -ENOBUFS)
+ pr_notice_once("can-isotp: tx queue is full\n");
+ }
+ dev_put(dev);
+}
+
static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
int ae)
{
@@ -734,7 +821,47 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
cf->data[i] = so->tx.buf[so->tx.idx++];
so->tx.sn = 1;
- so->tx.state = ISOTP_WAIT_FIRST_FC;
+}
+
+static void isotp_rcv_echo(struct sk_buff *skb, void *data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct isotp_sock *so = isotp_sk(sk);
+ struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+
+ /* only handle my own local echo skb's */
+ if (skb->sk != sk || so->cfecho != *(u32 *)cf->data)
+ return;
+
+ /* cancel local echo timeout */
+ hrtimer_cancel(&so->txtimer);
+
+ /* local echo skb with consecutive frame has been consumed */
+ so->cfecho = 0;
+
+ if (so->tx.idx >= so->tx.len) {
+ /* we are done */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+ return;
+ }
+
+ if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
+ /* stop and wait for FC with timeout */
+ so->tx.state = ISOTP_WAIT_FC;
+ hrtimer_start(&so->txtimer, ktime_set(1, 0),
+ HRTIMER_MODE_REL_SOFT);
+ return;
+ }
+
+ /* no gap between data frames needed => use burst mode */
+ if (!so->tx_gap) {
+ isotp_send_cframe(so);
+ return;
+ }
+
+ /* start timer to send next consecutive frame with correct delay */
+ hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -742,14 +869,28 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- struct sk_buff *skb;
- struct net_device *dev;
- struct canfd_frame *cf;
enum hrtimer_restart restart = HRTIMER_NORESTART;
- int can_send_ret;
- int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
switch (so->tx.state) {
+ case ISOTP_SENDING:
+
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (!so->cfecho) {
+ /* start timeout for unlikely lost echo skb */
+ hrtimer_set_expires(&so->txtimer,
+ ktime_add(ktime_get(),
+ ktime_set(2, 0)));
+ restart = HRTIMER_RESTART;
+
+ /* push out the next consecutive frame */
+ isotp_send_cframe(so);
+ break;
+ }
+
+ /* cfecho has not been cleared in isotp_rcv_echo() */
+ pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
+ fallthrough;
+
case ISOTP_WAIT_FC:
case ISOTP_WAIT_FIRST_FC:
@@ -765,78 +906,6 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
wake_up_interruptible(&so->wait);
break;
- case ISOTP_SENDING:
-
- /* push out the next segmented pdu */
- dev = dev_get_by_index(sock_net(sk), so->ifindex);
- if (!dev)
- break;
-
-isotp_tx_burst:
- skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv),
- GFP_ATOMIC);
- if (!skb) {
- dev_put(dev);
- break;
- }
-
- can_skb_reserve(skb);
- can_skb_prv(skb)->ifindex = dev->ifindex;
- can_skb_prv(skb)->skbcnt = 0;
-
- cf = (struct canfd_frame *)skb->data;
- skb_put_zero(skb, so->ll.mtu);
-
- /* create consecutive frame */
- isotp_fill_dataframe(cf, so, ae, 0);
-
- /* place consecutive frame N_PCI in appropriate index */
- cf->data[ae] = N_PCI_CF | so->tx.sn++;
- so->tx.sn %= 16;
- so->tx.bs++;
-
- cf->flags = so->ll.tx_flags;
-
- skb->dev = dev;
- can_skb_set_owner(skb, sk);
-
- can_send_ret = can_send(skb, 1);
- if (can_send_ret) {
- pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
- __func__, ERR_PTR(can_send_ret));
- if (can_send_ret == -ENOBUFS)
- pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n");
- }
- if (so->tx.idx >= so->tx.len) {
- /* we are done */
- so->tx.state = ISOTP_IDLE;
- dev_put(dev);
- wake_up_interruptible(&so->wait);
- break;
- }
-
- if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
- /* stop and wait for FC */
- so->tx.state = ISOTP_WAIT_FC;
- dev_put(dev);
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(1, 0)));
- restart = HRTIMER_RESTART;
- break;
- }
-
- /* no gap between data frames needed => use burst mode */
- if (!so->tx_gap)
- goto isotp_tx_burst;
-
- /* start timer to send next data frame with correct delay */
- dev_put(dev);
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(), so->tx_gap));
- restart = HRTIMER_RESTART;
- break;
-
default:
WARN_ON_ONCE(1);
}
@@ -854,6 +923,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct canfd_frame *cf;
int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+ s64 hrtimer_sec = 0;
int off;
int err;
@@ -876,34 +946,34 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (!size || size > MAX_MSG_LENGTH) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
/* does the given data fit into a single frame for SF_BROADCAST? */
- if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
+ if ((isotp_bc_flags(so) == CAN_ISOTP_SF_BROADCAST) &&
(size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
err = -EINVAL;
- goto err_out;
+ goto err_out_drop;
}
err = memcpy_from_msg(so->tx.buf, msg, size);
if (err < 0)
- goto err_out;
+ goto err_out_drop;
dev = dev_get_by_index(sock_net(sk), so->ifindex);
if (!dev) {
err = -ENXIO;
- goto err_out;
+ goto err_out_drop;
}
skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb) {
dev_put(dev);
- goto err_out;
+ goto err_out_drop;
}
can_skb_reserve(skb);
@@ -947,12 +1017,43 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/* don't enable wait queue for a single frame transmission */
wait_tx_done = 0;
} else {
- /* send first frame and wait for FC */
+ /* send first frame */
isotp_create_fframe(cf, so, ae);
- /* start timeout for FC */
- hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ if (isotp_bc_flags(so) == CAN_ISOTP_CF_BROADCAST) {
+ /* set timer for FC-less operation (STmin = 0) */
+ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
+ so->tx_gap = ktime_set(0, so->force_tx_stmin);
+ else
+ so->tx_gap = ktime_set(0, so->frame_txtime);
+
+ /* disable wait for FCs due to activated block size */
+ so->txfc.bs = 0;
+
+ /* cfecho should have been zero'ed by init */
+ if (so->cfecho)
+ pr_notice_once("can-isotp: no fc cfecho %08X\n",
+ so->cfecho);
+
+ /* set consecutive frame echo tag */
+ so->cfecho = *(u32 *)cf->data;
+
+ /* switch directly to ISOTP_SENDING state */
+ so->tx.state = ISOTP_SENDING;
+
+ /* start timeout for unlikely lost echo skb */
+ hrtimer_sec = 2;
+ } else {
+ /* standard flow control check */
+ so->tx.state = ISOTP_WAIT_FIRST_FC;
+
+ /* start timeout for FC */
+ hrtimer_sec = 1;
+ }
+
+ hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
+ HRTIMER_MODE_REL_SOFT);
}
/* send the first or only CAN frame */
@@ -965,7 +1066,15 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
- goto err_out;
+
+ /* no transmission -> no timeout monitoring */
+ if (hrtimer_sec)
+ hrtimer_cancel(&so->txtimer);
+
+ /* reset consecutive frame echo tag */
+ so->cfecho = 0;
+
+ goto err_out_drop;
}
if (wait_tx_done) {
@@ -978,6 +1087,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return size;
+err_out_drop:
+ /* drop this PDU and unlock a potential wait queue */
+ old_state = ISOTP_IDLE;
err_out:
so->tx.state = old_state;
if (so->tx.state == ISOTP_IDLE)
@@ -991,26 +1103,27 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
- int err = 0;
- int noblock;
+ struct isotp_sock *so = isotp_sk(sk);
+ int ret = 0;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EINVAL;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
+ if (!so->bound)
+ return -EADDRNOTAVAIL;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
- return err;
+ return ret;
if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
else
size = skb->len;
- err = memcpy_to_msg(msg, skb->data, size);
- if (err < 0) {
- skb_free_datagram(sk, skb);
- return err;
- }
+ ret = memcpy_to_msg(msg, skb->data, size);
+ if (ret < 0)
+ goto out_err;
sock_recv_timestamp(msg, sk, skb);
@@ -1020,9 +1133,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* set length of return value */
+ ret = (flags & MSG_TRUNC) ? skb->len : size;
+
+out_err:
skb_free_datagram(sk, skb);
- return size;
+ return ret;
}
static int isotp_release(struct socket *sock)
@@ -1052,15 +1169,20 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
+ if (so->bound && isotp_register_txecho(so)) {
if (so->ifindex) {
struct net_device *dev;
dev = dev_get_by_index(net, so->ifindex);
if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
+ if (isotp_register_rxid(so))
+ can_rx_unregister(net, dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
+ can_rx_unregister(net, dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
dev_put(dev);
synchronize_rcu();
}
@@ -1090,42 +1212,51 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct net *net = sock_net(sk);
int ifindex;
struct net_device *dev;
+ canid_t tx_id = addr->can_addr.tp.tx_id;
+ canid_t rx_id = addr->can_addr.tp.rx_id;
int err = 0;
int notify_enetdown = 0;
- int do_rx_reg = 1;
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
- if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
- return -EADDRNOTAVAIL;
+ /* sanitize tx CAN identifier */
+ if (tx_id & CAN_EFF_FLAG)
+ tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ tx_id &= CAN_SFF_MASK;
+
+ /* give feedback on wrong CAN-ID value */
+ if (tx_id != addr->can_addr.tp.tx_id)
+ return -EINVAL;
+
+ /* sanitize rx CAN identifier (if needed) */
+ if (isotp_register_rxid(so)) {
+ if (rx_id & CAN_EFF_FLAG)
+ rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ rx_id &= CAN_SFF_MASK;
+
+ /* give feedback on wrong CAN-ID value */
+ if (rx_id != addr->can_addr.tp.rx_id)
+ return -EINVAL;
+ }
if (!addr->can_ifindex)
return -ENODEV;
lock_sock(sk);
- /* do not register frame reception for functional addressing */
- if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
- do_rx_reg = 0;
-
- /* do not validate rx address for functional addressing */
- if (do_rx_reg) {
- if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
+ if (so->bound) {
+ err = -EINVAL;
+ goto out;
}
- if (so->bound && addr->can_ifindex == so->ifindex &&
- addr->can_addr.tp.rx_id == so->rxid &&
- addr->can_addr.tp.tx_id == so->txid)
+ /* ensure different CAN IDs when the rx_id is to be registered */
+ if (isotp_register_rxid(so) && rx_id == tx_id) {
+ err = -EADDRNOTAVAIL;
goto out;
+ }
dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
@@ -1147,30 +1278,25 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
- if (do_rx_reg)
- can_rx_register(net, dev, addr->can_addr.tp.rx_id,
- SINGLE_MASK(addr->can_addr.tp.rx_id),
+ if (isotp_register_rxid(so))
+ can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
- dev_put(dev);
+ if (isotp_register_txecho(so)) {
+ /* no consecutive frame echo skb in flight */
+ so->cfecho = 0;
- if (so->bound && do_rx_reg) {
- /* unregister old filter */
- if (so->ifindex) {
- dev = dev_get_by_index(net, so->ifindex);
- if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
- dev_put(dev);
- }
- }
+ /* register for echo skb's */
+ can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+ isotp_rcv_echo, sk, "isotpe", sk);
}
+ dev_put(dev);
+
/* switch to new settings */
so->ifindex = ifindex;
- so->rxid = addr->can_addr.tp.rx_id;
- so->txid = addr->can_addr.tp.tx_id;
+ so->rxid = rx_id;
+ so->txid = tx_id;
so->bound = 1;
out:
@@ -1224,6 +1350,23 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
/* no separate rx_ext_address is given => use ext_address */
if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
so->opt.rx_ext_address = so->opt.ext_address;
+
+ /* these broadcast flags are not allowed together */
+ if (isotp_bc_flags(so) == ISOTP_ALL_BC_FLAGS) {
+ /* CAN_ISOTP_SF_BROADCAST is prioritized */
+ so->opt.flags &= ~CAN_ISOTP_CF_BROADCAST;
+
+ /* give user feedback on wrong config attempt */
+ ret = -EINVAL;
+ }
+
+ /* check for frame_txtime changes (0 => no changes) */
+ if (so->opt.frame_txtime) {
+ if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
+ so->frame_txtime = 0;
+ else
+ so->frame_txtime = so->opt.frame_txtime;
+ }
break;
case CAN_ISOTP_RECV_FC:
@@ -1367,10 +1510,16 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST)))
- can_rx_unregister(dev_net(dev), dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
+ if (so->bound && isotp_register_txecho(so)) {
+ if (isotp_register_rxid(so))
+ can_rx_unregister(dev_net(dev), dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
+ can_rx_unregister(dev_net(dev), dev, so->txid,
+ SINGLE_MASK(so->txid),
+ isotp_rcv_echo, sk);
+ }
so->ifindex = 0;
so->bound = 0;
@@ -1425,6 +1574,7 @@ static int isotp_init(struct sock *sk)
so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
@@ -1444,6 +1594,7 @@ static int isotp_init(struct sock *sk)
so->txtimer.function = isotp_tx_timer_handler;
init_waitqueue_head(&so->wait);
+ spin_lock_init(&so->rx_lock);
spin_lock(&isotp_notifier_lock);
list_add_tail(&so->notifier, &isotp_notifier_list);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6dff4510687a..f5ecfdcf57b2 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -802,7 +802,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
SCM_J1939_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, 0, &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
return ret;
@@ -841,7 +841,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
paddr->can_addr.j1939.pgn = skcb->addr.pgn;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
msg->msg_flags |= skcb->msg_flags;
skb_free_datagram(sk, skb);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index a271688780a2..307ee1174a6e 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -2006,7 +2006,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
/* set the end-packet for broadcast */
session->pkt.last = session->pkt.total;
- skcb->tskey = session->sk->sk_tskey++;
+ skcb->tskey = atomic_inc_return(&session->sk->sk_tskey) - 1;
session->tskey = skcb->tskey;
return session;
diff --git a/net/can/proc.c b/net/can/proc.c
index b3099f0a3cb8..bbce97825f13 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -305,7 +305,7 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)PDE_DATA(m->file->f_inode);
+ int idx = (int)(long)pde_data(m->file->f_inode);
struct net_device *dev;
struct can_dev_rcv_lists *dev_rcv_lists;
struct net *net = m->private;
diff --git a/net/can/raw.c b/net/can/raw.c
index 7105fa4824e4..d1bd9cc51ebe 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -772,6 +772,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
+ struct sockcm_cookie sockc;
struct sk_buff *skb;
struct net_device *dev;
int ifindex;
@@ -817,11 +818,18 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err < 0)
goto free_skb;
- skb_setup_tx_timestamp(skb, sk->sk_tsflags);
+ sockcm_init(&sockc, sk);
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(sk, msg, &sockc);
+ if (unlikely(err))
+ goto free_skb;
+ }
skb->dev = dev;
- skb->sk = sk;
skb->priority = sk->sk_priority;
+ skb->tstamp = sockc.transmit_time;
+
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
err = can_send(skb, ro->loopback);
@@ -846,16 +854,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int err = 0;
- int noblock;
-
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
if (flags & MSG_ERRQUEUE)
return sock_recv_errqueue(sk, msg, size,
SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
@@ -870,7 +874,7 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return err;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(RAW_MIN_NAMELEN);
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 5622763ad402..7e51f128045d 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -7,7 +7,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
+#include <linux/ceph/libceph.h> /* for kvmalloc */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
@@ -17,7 +17,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (!b)
return NULL;
- b->vec.iov_base = ceph_kvmalloc(len, gfp);
+ b->vec.iov_base = kvmalloc(len, gfp);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 97d6ea763e32..4c6441536d55 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -190,41 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
-/*
- * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
- * compatible with (a superset of) GFP_KERNEL. This is because while the
- * actual pages are allocated with the specified flags, the page table pages
- * are always allocated with GFP_KERNEL.
- *
- * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
- */
-void *ceph_kvmalloc(size_t size, gfp_t flags)
-{
- void *p;
-
- if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
- p = kvmalloc(size, flags);
- } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
- unsigned int nofs_flag = memalloc_nofs_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_nofs_restore(nofs_flag);
- } else {
- unsigned int noio_flag = memalloc_noio_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_noio_restore(noio_flag);
- }
-
- return p;
-}
-
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
- dout("parse_fsid '%s'\n", str);
+ dout("%s '%s'\n", __func__, str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
@@ -244,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
+ dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
return err;
}
+EXPORT_SYMBOL(ceph_parse_fsid);
/*
* ceph options
@@ -272,6 +246,7 @@ enum {
Opt_cephx_sign_messages,
Opt_tcp_nodelay,
Opt_abort_on_full,
+ Opt_rxbounce,
};
enum {
@@ -321,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
+ fsparam_flag ("rxbounce", Opt_rxbounce),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
@@ -422,14 +398,14 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l)
+ struct fc_log *l, char delim)
{
struct p_log log = {.prefix = "libceph", .log = l};
int ret;
- /* ip1[:port1][,ip2[:port2]...] */
+ /* ip1[:port1][<delim>ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
- &opt->num_mon);
+ &opt->num_mon, delim);
if (ret) {
error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
@@ -455,8 +431,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_ip:
err = ceph_parse_ips(param->string,
param->string + param->size,
- &opt->my_addr,
- 1, NULL);
+ &opt->my_addr, 1, NULL, ',');
if (err) {
error_plog(&log, "Failed to parse ip: %d", err);
return err;
@@ -465,7 +440,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_fsid:
- err = parse_fsid(param->string, &opt->fsid);
+ err = ceph_parse_fsid(param->string, &opt->fsid);
if (err) {
error_plog(&log, "Failed to parse fsid: %d", err);
return err;
@@ -611,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_abort_on_full:
opt->flags |= CEPH_OPT_ABORT_ON_FULL;
break;
+ case Opt_rxbounce:
+ opt->flags |= CEPH_OPT_RXBOUNCE;
+ break;
default:
BUG();
@@ -687,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "notcp_nodelay,");
if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
seq_puts(m, "abort_on_full,");
+ if (opt->flags & CEPH_OPT_RXBOUNCE)
+ seq_puts(m, "rxbounce,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,",
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 7057f8db4f99..1daf95e17d67 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -906,7 +906,6 @@ int crush_do_rule(const struct crush_map *map,
int recurse_to_leaf;
int wsize = 0;
int osize;
- int *tmp;
const struct crush_rule *rule;
__u32 step;
int i, j;
@@ -1073,9 +1072,7 @@ int crush_do_rule(const struct crush_map *map,
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */
- tmp = o;
- o = w;
- w = tmp;
+ swap(o, w);
wsize = osize;
break;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 92d89b331645..051d22c0e4ad 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -147,7 +147,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
/*
- * Should be used for buffers allocated with ceph_kvmalloc().
+ * Should be used for buffers allocated with kvmalloc().
* Currently these are encrypt out-buffer (ceph_buffer) and decrypt
* in-buffer (msg front).
*
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57d043b382ed..d3bb656308b4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
+ if (con->bounce_page) {
+ __free_page(con->bounce_page);
+ con->bounce_page = NULL;
+ }
if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_reset_protocol(con);
@@ -1267,30 +1271,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
*/
int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count)
+ int max_count, int *count, char delim)
{
int i, ret = -EINVAL;
const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
+ char cur_delim = delim;
const char *ipend;
int port;
- char delim = ',';
if (*p == '[') {
- delim = ']';
+ cur_delim = ']';
p++;
}
- ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
+ &ipend);
if (ret)
goto bad;
ret = -EINVAL;
p = ipend;
- if (delim == ']') {
+ if (cur_delim == ']') {
if (*p != ']') {
dout("missing matching ']'\n");
goto bad;
@@ -1326,11 +1331,11 @@ int ceph_parse_ips(const char *c, const char *end,
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
addr[i].nonce = 0;
- dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
+ dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
if (p == end)
break;
- if (*p != ',')
+ if (*p != delim)
goto bad;
p++;
}
@@ -1920,7 +1925,7 @@ struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
/* front */
if (front_len) {
- m->front.iov_base = ceph_kvmalloc(front_len, flags);
+ m->front.iov_base = kvmalloc(front_len, flags);
if (m->front.iov_base == NULL) {
dout("ceph_msg_new can't allocate %d bytes\n",
front_len);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 2cb5ffdf071a..6b014eca3a13 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
static int read_partial_msg_data(struct ceph_connection *con)
{
- struct ceph_msg *msg = con->in_msg;
- struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0;
int ret;
- if (!msg->num_data_items)
- return -EIO;
-
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->total_resid) {
@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
return 1; /* must return > 0 to indicate success */
}
+static int read_partial_msg_data_bounce(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ struct page *page;
+ size_t off, len;
+ u32 crc;
+ int ret;
+
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ crc = con->in_data_crc;
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
+ page = ceph_msg_data_next(cursor, &off, &len, NULL);
+ ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
+ if (ret <= 0) {
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ crc = crc32c(crc, page_address(con->bounce_page), ret);
+ memcpy_to_page(page, off, page_address(con->bounce_page), ret);
+
+ ceph_msg_data_advance(cursor, ret);
+ }
+ con->in_data_crc = crc;
+
+ return 1; /* must return > 0 to indicate success */
+}
+
/*
* read (part of) a message.
*/
@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
/* (page) data */
if (data_len) {
- ret = read_partial_msg_data(con);
+ if (!m->num_data_items)
+ return -EIO;
+
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ ret = read_partial_msg_data_bounce(con);
+ else
+ ret = read_partial_msg_data(con);
if (ret <= 0)
return ret;
}
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index cc40ce4e02fb..c6e5bfc717d5 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -57,8 +57,9 @@
#define IN_S_HANDLE_CONTROL_REMAINDER 3
#define IN_S_PREPARE_READ_DATA 4
#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_HANDLE_EPILOGUE 6
-#define IN_S_FINISH_SKIP 7
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_HANDLE_EPILOGUE 7
+#define IN_S_FINISH_SKIP 8
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -308,7 +309,7 @@ static void *alloc_conn_buf(struct ceph_connection *con, int len)
if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
return NULL;
- buf = ceph_kvmalloc(len, GFP_NOIO);
+ buf = kvmalloc(len, GFP_NOIO);
if (!buf)
return NULL;
@@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
-static int decrypt_message(struct ceph_connection *con)
+static int decrypt_tail(struct ceph_connection *con)
{
+ struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ int tail_len;
int ret;
+ tail_len = tail_onwire_len(con->in_msg, true);
+ ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt, 0, tail_len,
+ GFP_NOIO);
+ if (ret)
+ goto out;
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
con->v2.in_buf, true);
if (ret)
goto out;
- ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl,
- tail_onwire_len(con->in_msg, true));
+ dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
+ con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
+ ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
+ if (ret)
+ goto out;
+
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
out:
sg_free_table(&sgt);
+ sg_free_table(&enc_sgt);
return ret;
}
@@ -1733,54 +1753,153 @@ static int prepare_read_control_remainder(struct ceph_connection *con)
return 0;
}
-static void prepare_read_data(struct ceph_connection *con)
+static int prepare_read_data(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
- con->in_data_crc = -1;
+ con->in_data_crc = -1;
ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
data_len(con->in_msg));
get_bvec_at(&con->v2.in_cursor, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
set_in_bvec(con, &bv);
con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
+ return 0;
}
static void prepare_read_data_cont(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+
+ get_bvec_at(&con->v2.in_cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
con->v2.in_bvec.bv_page,
con->v2.in_bvec.bv_offset,
con->v2.in_bvec.bv_len);
+ }
ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
if (con->v2.in_cursor.total_resid) {
get_bvec_at(&con->v2.in_cursor, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
set_in_bvec(con, &bv);
WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
return;
}
/*
- * We've read all data. Prepare to read data padding (if any)
- * and epilogue.
+ * We've read all data. Prepare to read epilogue.
*/
reset_in_kvecs(con);
- if (con_secure(con)) {
- if (need_padding(data_len(con->in_msg)))
- add_in_kvec(con, DATA_PAD(con->v2.in_buf),
- padding_len(data_len(con->in_msg)));
- add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+}
+
+static int prepare_read_tail_plain(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ if (!front_len(msg) && !middle_len(msg)) {
+ WARN_ON(!data_len(msg));
+ return prepare_read_data(con);
+ }
+
+ reset_in_kvecs(con);
+ if (front_len(msg)) {
+ add_in_kvec(con, msg->front.iov_base, front_len(msg));
+ WARN_ON(msg->front.iov_len != front_len(msg));
+ }
+ if (middle_len(msg)) {
+ add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
+ WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
+ }
+
+ if (data_len(msg)) {
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+ return 0;
+}
+
+static void prepare_read_enc_page(struct ceph_connection *con)
+{
+ struct bio_vec bv;
+
+ dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
+ con->v2.in_enc_resid);
+ WARN_ON(!con->v2.in_enc_resid);
+
+ bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
+ bv.bv_offset = 0;
+ bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+
+ set_in_bvec(con, &bv);
+ con->v2.in_enc_i++;
+ con->v2.in_enc_resid -= bv.bv_len;
+
+ if (con->v2.in_enc_resid) {
+ con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
+ return;
+ }
+
+ /*
+ * We are set to read the last piece of ciphertext (ending
+ * with epilogue) + auth tag.
+ */
+ WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_read_tail_secure(struct ceph_connection *con)
+{
+ struct page **enc_pages;
+ int enc_page_cnt;
+ int tail_len;
+
+ tail_len = tail_onwire_len(con->in_msg, true);
+ WARN_ON(!tail_len);
+
+ enc_page_cnt = calc_pages_for(0, tail_len);
+ enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
+ if (IS_ERR(enc_pages))
+ return PTR_ERR(enc_pages);
+
+ WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = enc_pages;
+ con->v2.in_enc_page_cnt = enc_page_cnt;
+ con->v2.in_enc_resid = tail_len;
+ con->v2.in_enc_i = 0;
+
+ prepare_read_enc_page(con);
+ return 0;
+}
+
static void __finish_skip(struct ceph_connection *con)
{
con->in_seq++;
@@ -2589,47 +2708,26 @@ static int __handle_control(struct ceph_connection *con, void *p)
}
msg = con->in_msg; /* set in process_message_header() */
- if (!front_len(msg) && !middle_len(msg)) {
- if (!data_len(msg))
- return process_message(con);
-
- prepare_read_data(con);
- return 0;
- }
-
- reset_in_kvecs(con);
if (front_len(msg)) {
WARN_ON(front_len(msg) > msg->front_alloc_len);
- add_in_kvec(con, msg->front.iov_base, front_len(msg));
msg->front.iov_len = front_len(msg);
-
- if (con_secure(con) && need_padding(front_len(msg)))
- add_in_kvec(con, FRONT_PAD(con->v2.in_buf),
- padding_len(front_len(msg)));
} else {
msg->front.iov_len = 0;
}
if (middle_len(msg)) {
WARN_ON(middle_len(msg) > msg->middle->alloc_len);
- add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
msg->middle->vec.iov_len = middle_len(msg);
-
- if (con_secure(con) && need_padding(middle_len(msg)))
- add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf),
- padding_len(middle_len(msg)));
} else if (msg->middle) {
msg->middle->vec.iov_len = 0;
}
- if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
- } else {
- add_in_kvec(con, con->v2.in_buf,
- con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN :
- CEPH_EPILOGUE_PLAIN_LEN);
- con->v2.in_state = IN_S_HANDLE_EPILOGUE;
- }
- return 0;
+ if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
+ return process_message(con);
+
+ if (con_secure(con))
+ return prepare_read_tail_secure(con);
+
+ return prepare_read_tail_plain(con);
}
static int handle_preamble(struct ceph_connection *con)
@@ -2717,7 +2815,7 @@ static int handle_epilogue(struct ceph_connection *con)
int ret;
if (con_secure(con)) {
- ret = decrypt_message(con);
+ ret = decrypt_tail(con);
if (ret) {
if (ret == -EBADMSG)
con->error_msg = "integrity error, bad epilogue auth tag";
@@ -2785,13 +2883,16 @@ static int populate_in_iter(struct ceph_connection *con)
ret = handle_control_remainder(con);
break;
case IN_S_PREPARE_READ_DATA:
- prepare_read_data(con);
- ret = 0;
+ ret = prepare_read_data(con);
break;
case IN_S_PREPARE_READ_DATA_CONT:
prepare_read_data_cont(con);
ret = 0;
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ prepare_read_enc_page(con);
+ ret = 0;
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3326,20 +3427,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
static void revoke_at_prepare_read_data(struct ceph_connection *con)
{
- int remaining; /* data + [data padding] + epilogue */
+ int remaining;
int resid;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
- if (con_secure(con))
- remaining = padded_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
remaining);
con->v2.in_iter.count -= resid;
@@ -3350,8 +3447,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con)
static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
{
int recved, resid; /* current piece of data */
- int remaining; /* [data padding] + epilogue */
+ int remaining;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
@@ -3363,12 +3461,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
ceph_msg_data_advance(&con->v2.in_cursor, recved);
WARN_ON(resid > con->v2.in_cursor.total_resid);
- if (con_secure(con))
- remaining = padding_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
con->v2.in_cursor.total_resid, remaining);
con->v2.in_iter.count -= resid;
@@ -3376,11 +3469,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
+{
+ int resid; /* current enc page (not necessarily data) */
+
+ WARN_ON(!con_secure(con));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
+
+ dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
+ con->v2.in_enc_resid);
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + con->v2.in_enc_resid);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
- WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
@@ -3399,6 +3507,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_DATA_CONT:
revoke_at_prepare_read_data_cont(con);
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ revoke_at_prepare_read_enc_page(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
@@ -3432,6 +3543,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
clear_out_sign_kvecs(con);
free_conn_bufs(con);
+ if (con->v2.in_enc_pages) {
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
+ }
if (con->v2.out_enc_pages) {
WARN_ON(!con->v2.out_enc_page_cnt);
ceph_release_page_vector(con->v2.out_enc_pages,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1c5815530e0d..9d82bb42e958 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
target_init(&req->r_t);
}
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable. Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
- struct ceph_osd_client *osdc = req->r_osdc;
- bool mempool = req->r_mempool;
- unsigned int num_ops = req->r_num_ops;
- u64 snapid = req->r_snapid;
- struct ceph_snap_context *snapc = req->r_snapc;
- bool linger = req->r_linger;
- struct ceph_msg *request_msg = req->r_request;
- struct ceph_msg *reply_msg = req->r_reply;
-
- dout("%s req %p\n", __func__, req);
- WARN_ON(kref_read(&req->r_kref) != 1);
- request_release_checks(req);
-
- WARN_ON(kref_read(&request_msg->kref) != 1);
- WARN_ON(kref_read(&reply_msg->kref) != 1);
- target_destroy(&req->r_t);
-
- request_init(req);
- req->r_osdc = osdc;
- req->r_mempool = mempool;
- req->r_num_ops = num_ops;
- req->r_snapid = snapid;
- req->r_snapc = snapc;
- req->r_linger = linger;
- req->r_request = request_msg;
- req->r_reply = reply_msg;
-}
-
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
* @watch_opcode: CEPH_OSD_WATCH_OP_*
*/
static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
- u64 cookie, u8 watch_opcode)
+ u8 watch_opcode, u64 cookie, u32 gen)
{
struct ceph_osd_req_op *op;
op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
- op->watch.gen = 0;
+ op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+ u64 cookie, struct ceph_pagelist *request_pl)
+{
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op->notify.cookie = cookie;
+
+ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+ op->indata_len = request_pl->length;
}
/*
@@ -2385,7 +2364,11 @@ again:
if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
err = -ENOSPC;
} else {
- pr_warn_ratelimited("FULL or reached pool quota\n");
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL))
+ pr_warn_ratelimited("cluster is full (osdmap FULL)\n");
+ else
+ pr_warn_ratelimited("pool %lld is full or reached quota\n",
+ req->r_t.base_oloc.pool);
req->r_t.paused = true;
maybe_request_map(osdc);
}
@@ -2727,10 +2710,13 @@ static void linger_release(struct kref *kref)
WARN_ON(!list_empty(&lreq->pending_lworks));
WARN_ON(lreq->osd);
- if (lreq->reg_req)
- ceph_osdc_put_request(lreq->reg_req);
- if (lreq->ping_req)
- ceph_osdc_put_request(lreq->ping_req);
+ if (lreq->request_pl)
+ ceph_pagelist_release(lreq->request_pl);
+ if (lreq->notify_id_pages)
+ ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+ ceph_osdc_put_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->ping_req);
target_destroy(&lreq->t);
kfree(lreq);
}
@@ -2999,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3044,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
lreq, lreq->linger_id, req->r_result, lreq->last_error);
if (req->r_result < 0) {
@@ -3053,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
static void send_linger(struct ceph_osd_linger_request *lreq)
{
- struct ceph_osd_request *req = lreq->reg_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_client *osdc = lreq->osdc;
+ struct ceph_osd_request *req;
+ int ret;
- verify_osdc_wrlocked(req->r_osdc);
+ verify_osdc_wrlocked(osdc);
+ mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->reg_req) {
+ if (lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->reg_req);
+ }
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- request_reinit(req);
target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
- mutex_lock(&lreq->lock);
if (lreq->is_watch && lreq->committed) {
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id);
- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
- op->watch.gen = ++lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+ lreq->linger_id, ++lreq->register_gen);
dout("lreq %p reconnect register_gen %u\n", lreq,
- op->watch.gen);
+ req->r_ops[0].watch.gen);
req->r_callback = linger_reconnect_cb;
} else {
- if (!lreq->is_watch)
+ if (lreq->is_watch) {
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+ lreq->linger_id, 0);
+ } else {
lreq->notify_id = 0;
- else
- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+ refcount_inc(&lreq->request_pl->refcnt);
+ osd_req_op_notify_init(req, 0, lreq->linger_id,
+ lreq->request_pl);
+ ceph_osd_data_pages_init(
+ osd_req_op_data(req, 0, notify, response_data),
+ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+ }
dout("lreq %p register\n", lreq);
req->r_callback = linger_commit_cb;
}
- mutex_unlock(&lreq->lock);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->reg_req = req;
+ mutex_unlock(&lreq->lock);
submit_request(req, true);
}
@@ -3102,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->ping_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
__func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
lreq->last_error);
@@ -3117,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
lreq->register_gen, req->r_ops[0].watch.gen);
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3124,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
static void send_linger_ping(struct ceph_osd_linger_request *lreq)
{
struct ceph_osd_client *osdc = lreq->osdc;
- struct ceph_osd_request *req = lreq->ping_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_request *req;
+ int ret;
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
__func__, lreq, lreq->linger_id, lreq->ping_sent,
lreq->register_gen);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->ping_req) {
+ if (lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+ ceph_osdc_put_request(lreq->ping_req);
+ }
- request_reinit(req);
- target_copy(&req->r_t, &lreq->t);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id ||
- op->watch.op != CEPH_OSD_WATCH_OP_PING);
- op->watch.gen = lreq->register_gen;
+ target_copy(&req->r_t, &lreq->t);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+ lreq->register_gen);
req->r_callback = linger_ping_cb;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
+
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->ping_req = req;
ceph_osdc_get_request(req);
account_request(req);
@@ -3165,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
down_write(&osdc->lock);
linger_register(lreq);
- if (lreq->is_watch) {
- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
- } else {
- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
- }
calc_target(osdc, &lreq->t, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
*/
static void __linger_cancel(struct ceph_osd_linger_request *lreq)
{
- if (lreq->is_watch && lreq->ping_req->r_osd)
+ if (lreq->ping_req && lreq->ping_req->r_osd)
cancel_linger_request(lreq->ping_req);
- if (lreq->reg_req->r_osd)
+ if (lreq->reg_req && lreq->reg_req->r_osd)
cancel_linger_request(lreq->reg_req);
cancel_linger_map_check(lreq);
unlink_linger(lreq->osd, lreq);
@@ -4566,8 +4591,13 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
- * Unregister a registered request. The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request. If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
@@ -4653,43 +4683,6 @@ again:
}
EXPORT_SYMBOL(ceph_osdc_sync);
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
- struct ceph_osd_request *req;
-
- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
- if (!req)
- return NULL;
-
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
- struct ceph_osd_request *req;
-
- req = alloc_linger_request(lreq);
- if (!req)
- return NULL;
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
- ceph_osdc_put_request(req);
- return NULL;
- }
-
- return req;
-}
-
/*
* Returns a handle, caller owns a ref.
*/
@@ -4719,18 +4712,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
- if (!lreq->ping_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (ret) {
@@ -4768,8 +4749,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&req->r_mtime);
- osd_req_op_watch_init(req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_UNWATCH);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+ lreq->linger_id, 0);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
@@ -4855,35 +4836,6 @@ out_put_req:
}
EXPORT_SYMBOL(ceph_osdc_notify_ack);
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
- u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, u32 payload_len)
-{
- struct ceph_osd_req_op *op;
- struct ceph_pagelist *pl;
- int ret;
-
- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
- op->notify.cookie = cookie;
-
- pl = ceph_pagelist_alloc(GFP_NOIO);
- if (!pl)
- return -ENOMEM;
-
- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
- ret |= ceph_pagelist_encode_32(pl, timeout);
- ret |= ceph_pagelist_encode_32(pl, payload_len);
- ret |= ceph_pagelist_append(pl, payload, payload_len);
- if (ret) {
- ceph_pagelist_release(pl);
- return -ENOMEM;
- }
-
- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
- op->indata_len = pl->length;
- return 0;
-}
-
/*
* @timeout: in seconds
*
@@ -4902,7 +4854,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
size_t *preply_len)
{
struct ceph_osd_linger_request *lreq;
- struct page **pages;
int ret;
WARN_ON(!timeout);
@@ -4915,41 +4866,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
if (!lreq)
return -ENOMEM;
- lreq->preply_pages = preply_pages;
- lreq->preply_len = preply_len;
-
- ceph_oid_copy(&lreq->t.base_oid, oid);
- ceph_oloc_copy(&lreq->t.base_oloc, oloc);
- lreq->t.flags = CEPH_OSD_FLAG_READ;
-
- lreq->reg_req = alloc_linger_request(lreq);
- if (!lreq->reg_req) {
+ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+ if (!lreq->request_pl) {
ret = -ENOMEM;
goto out_put_lreq;
}
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
- payload, payload_len);
- if (ret)
+ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+ if (ret) {
+ ret = -ENOMEM;
goto out_put_lreq;
+ }
/* for notify_id */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
+ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(lreq->notify_id_pages)) {
+ ret = PTR_ERR(lreq->notify_id_pages);
+ lreq->notify_id_pages = NULL;
goto out_put_lreq;
}
- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
- response_data),
- pages, PAGE_SIZE, 0, false, true);
- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
- if (ret)
- goto out_put_lreq;
+ lreq->preply_pages = preply_pages;
+ lreq->preply_len = preply_len;
+
+ ceph_oid_copy(&lreq->t.base_oid, oid);
+ ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+ lreq->t.flags = CEPH_OSD_FLAG_READ;
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 75b738083523..2823bb3cff55 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -980,7 +980,7 @@ static struct crush_work *alloc_workspace(const struct crush_map *c)
work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
dout("%s work_size %zu bytes\n", __func__, work_size);
- work = ceph_kvmalloc(work_size, GFP_NOIO);
+ work = kvmalloc(work_size, GFP_NOIO);
if (!work)
return NULL;
@@ -1190,9 +1190,9 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (max == map->max_osd)
return 0;
- state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
- weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
- addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+ state = kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+ weight = kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+ addr = kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
if (!state || !weight || !addr) {
kvfree(state);
kvfree(weight);
@@ -1222,7 +1222,7 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (map->osd_primary_affinity) {
u32 *affinity;
- affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+ affinity = kvmalloc(array_size(max, sizeof(*affinity)),
GFP_NOFS);
if (!affinity)
return -ENOMEM;
@@ -1503,7 +1503,7 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
if (!map->osd_primary_affinity) {
int i;
- map->osd_primary_affinity = ceph_kvmalloc(
+ map->osd_primary_affinity = kvmalloc(
array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
GFP_NOFS);
if (!map->osd_primary_affinity)
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d9c37fd10809..a25ec93729b9 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);
return 0;
}
@@ -75,8 +75,8 @@ void bpf_sk_storage_free(struct sock *sk)
* sk_storage.
*/
bpf_selem_unlink_map(selem);
- free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
- selem, true);
+ free_sk_storage = bpf_selem_unlink_storage_nolock(
+ sk_storage, selem, true, false);
}
raw_spin_unlock_bh(&sk_storage->lock);
rcu_read_unlock();
@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
if (sock) {
sdata = bpf_local_storage_update(
sock->sk, (struct bpf_local_storage_map *)map, value,
- map_flags);
+ map_flags, GFP_ATOMIC);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;
- copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
if (!copy_selem)
return NULL;
@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
bpf_selem_link_map(smap, copy_selem);
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
- ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
+ ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
@@ -255,8 +255,9 @@ out:
return ret;
}
-BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
- void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
+ void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
refcount_inc_not_zero(&sk->sk_refcnt)) {
sdata = bpf_local_storage_update(
sk, (struct bpf_local_storage_map *)map, value,
- BPF_NOEXIST);
+ BPF_NOEXIST, gfp_flags);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -337,7 +338,7 @@ bpf_sk_storage_ptr(void *owner)
return &sk->sk_bpf_storage;
}
-static int sk_storage_map_btf_id;
+BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops sk_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -348,8 +349,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_name = "bpf_local_storage_map",
- .map_btf_id = &sk_storage_map_btf_id,
+ .map_btf_id = &sk_storage_map_btf_ids[0],
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
@@ -405,6 +405,8 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
btf_vmlinux = bpf_get_btf_vmlinux();
+ if (IS_ERR_OR_NULL(btf_vmlinux))
+ return false;
btf_id = prog->aux->attach_btf_id;
t = btf_type_by_id(btf_vmlinux, btf_id);
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -417,14 +419,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return false;
}
-BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
- void *, value, u64, flags)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
+ void *, value, u64, flags, gfp_t, gfp_flags)
{
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
- return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
+ return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
+ gfp_flags);
}
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee290776c661..50f4faeea76c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -62,8 +62,6 @@
#include <trace/events/skb.h>
#include <net/busy_poll.h>
-#include "datagram.h"
-
/*
* Is a socket 'connection oriented' ?
*/
@@ -310,12 +308,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
EXPORT_SYMBOL(__skb_recv_datagram);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
- int noblock, int *err)
+ int *err)
{
int off = 0;
- return __skb_recv_datagram(sk, &sk->sk_receive_queue,
- flags | (noblock ? MSG_DONTWAIT : 0),
+ return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags,
&off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/datagram.h b/net/core/datagram.h
deleted file mode 100644
index bcfb75bfa3b2..000000000000
--- a/net/core/datagram.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _NET_CORE_DATAGRAM_H_
-#define _NET_CORE_DATAGRAM_H_
-
-#include <linux/types.h>
-
-struct sock;
-struct sk_buff;
-struct iov_iter;
-
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
-
-#endif /* _NET_CORE_DATAGRAM_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 84a0d9542fe9..30a1603a7225 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include "dev.h"
#include "net-sysfs.h"
@@ -216,18 +217,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
-static inline void rps_lock(struct softnet_data *sd)
+static inline void rps_lock_irqsave(struct softnet_data *sd,
+ unsigned long *flags)
{
-#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
-#endif
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_save(*flags);
}
-static inline void rps_unlock(struct softnet_data *sd)
+static inline void rps_lock_irq_disable(struct softnet_data *sd)
{
-#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
-#endif
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_lock_irq(&sd->input_pkt_queue.lock);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
+}
+
+static inline void rps_unlock_irq_restore(struct softnet_data *sd,
+ unsigned long *flags)
+{
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_restore(*flags);
+}
+
+static inline void rps_unlock_irq_enable(struct softnet_data *sd)
+{
+ if (IS_ENABLED(CONFIG_RPS))
+ spin_unlock_irq(&sd->input_pkt_queue.lock);
+ else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
}
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
@@ -320,7 +341,6 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return 0;
}
-EXPORT_SYMBOL(netdev_name_node_alt_create);
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
@@ -348,7 +368,6 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
return 0;
}
-EXPORT_SYMBOL(netdev_name_node_alt_destroy);
static void netdev_name_node_alt_flush(struct net_device *dev)
{
@@ -378,16 +397,18 @@ static void list_netdevice(struct net_device *dev)
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev)
+static void unlist_netdevice(struct net_device *dev, bool lock)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
- write_lock(&dev_base_lock);
+ if (lock)
+ write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- write_unlock(&dev_base_lock);
+ if (lock)
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -663,11 +684,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev;
struct net_device_path_ctx ctx = {
.dev = dev,
- .daddr = daddr,
};
struct net_device_path *path;
int ret = 0;
+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev;
@@ -683,6 +704,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
if (WARN_ON_ONCE(last_dev == ctx.dev))
return -1;
}
+
+ if (!ctx.dev)
+ return ret;
+
path = dev_fwd_path(stack);
if (!path)
return -1;
@@ -1037,7 +1062,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, name_node->name, IFNAMSIZ))
- set_bit(i, inuse);
+ __set_bit(i, inuse);
}
if (!sscanf(d->name, name, &i))
continue;
@@ -1047,7 +1072,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
/* avoid cases where sscanf is not exact inverse of printf */
snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
- set_bit(i, inuse);
+ __set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, max_netdevices);
@@ -1602,7 +1627,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
- N(PRE_CHANGEADDR)
+ N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
+ N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1919,6 +1945,32 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+/**
+ * call_netdevice_notifiers_info_robust - call per-netns notifier blocks
+ * for and rollback on error
+ * @val_up: value passed unmodified to notifier function
+ * @val_down: value passed unmodified to the notifier function when
+ * recovering from an error on @val_up
+ * @info: notifier information data
+ *
+ * Call all per-netns network notifier blocks, but not notifier blocks on
+ * the global notifier chain. Parameters and return value are as for
+ * raw_notifier_call_chain_robust().
+ */
+
+static int
+call_netdevice_notifiers_info_robust(unsigned long val_up,
+ unsigned long val_down,
+ struct netdev_notifier_info *info)
+{
+ struct net *net = dev_net(info->dev);
+
+ ASSERT_RTNL();
+
+ return raw_notifier_call_chain_robust(&net->netdev_chain,
+ val_up, val_down, info);
+}
+
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack)
@@ -2000,7 +2052,8 @@ void net_dec_egress_queue(void)
EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
-static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
+DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
+EXPORT_SYMBOL(netstamp_needed_key);
#ifdef CONFIG_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
static atomic_t netstamp_wanted;
@@ -2061,14 +2114,15 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
skb->tstamp = 0;
+ skb->mono_delivery_time = 0;
if (static_branch_unlikely(&netstamp_needed_key))
- __net_timestamp(skb);
+ skb->tstamp = ktime_get_real();
}
#define net_timestamp_check(COND, SKB) \
if (static_branch_unlikely(&netstamp_needed_key)) { \
if ((COND) && !(SKB)->tstamp) \
- __net_timestamp(SKB); \
+ (SKB)->tstamp = ktime_get_real(); \
} \
bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
@@ -2941,15 +2995,73 @@ undo_rx:
EXPORT_SYMBOL(netif_set_real_num_queues);
/**
+ * netif_set_tso_max_size() - set the max size of TSO frames supported
+ * @dev: netdev to update
+ * @size: max skb->len of a TSO frame
+ *
+ * Set the limit on the size of TSO super-frames the device can handle.
+ * Unless explicitly set the stack will assume the value of
+ * %GSO_LEGACY_MAX_SIZE.
+ */
+void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
+{
+ dev->tso_max_size = min(GSO_MAX_SIZE, size);
+ if (size < READ_ONCE(dev->gso_max_size))
+ netif_set_gso_max_size(dev, size);
+}
+EXPORT_SYMBOL(netif_set_tso_max_size);
+
+/**
+ * netif_set_tso_max_segs() - set the max number of segs supported for TSO
+ * @dev: netdev to update
+ * @segs: max number of TCP segments
+ *
+ * Set the limit on the number of TCP segments the device can generate from
+ * a single TSO super-frame.
+ * Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS.
+ */
+void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs)
+{
+ dev->tso_max_segs = segs;
+ if (segs < READ_ONCE(dev->gso_max_segs))
+ netif_set_gso_max_segs(dev, segs);
+}
+EXPORT_SYMBOL(netif_set_tso_max_segs);
+
+/**
+ * netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper
+ * @to: netdev to update
+ * @from: netdev from which to copy the limits
+ */
+void netif_inherit_tso_max(struct net_device *to, const struct net_device *from)
+{
+ netif_set_tso_max_size(to, from->tso_max_size);
+ netif_set_tso_max_segs(to, from->tso_max_segs);
+}
+EXPORT_SYMBOL(netif_inherit_tso_max);
+
+/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
- * This routine should set an upper limit on the number of RSS queues
- * used by default by multiqueue devices.
+ * Default value is the number of physical cores if there are only 1 or 2, or
+ * divided by 2 if there are more.
*/
int netif_get_num_default_rss_queues(void)
{
- return is_kdump_kernel() ?
- 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+ cpumask_var_t cpus;
+ int cpu, count = 0;
+
+ if (unlikely(is_kdump_kernel() || !zalloc_cpumask_var(&cpus, GFP_KERNEL)))
+ return 1;
+
+ cpumask_copy(cpus, cpu_online_mask);
+ for_each_cpu(cpu, cpus) {
+ ++count;
+ cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu));
+ }
+ free_cpumask_var(cpus);
+
+ return count > 2 ? DIV_ROUND_UP(count, 2) : count;
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);
@@ -3156,12 +3268,18 @@ int skb_checksum_help(struct sk_buff *skb)
}
offset = skb_checksum_start_offset(skb);
- BUG_ON(offset >= skb_headlen(skb));
+ ret = -EINVAL;
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
- BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
-
+ if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
if (ret)
goto out;
@@ -3468,7 +3586,6 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
dev_queue_xmit_nit(skb, dev);
len = skb->len;
- PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev, txq, more);
trace_net_dev_xmit(skb, rc, dev, len);
@@ -3586,7 +3703,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
out_kfree_skb:
kfree_skb(skb);
out_null:
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
return NULL;
}
@@ -3710,7 +3827,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
no_lock_out:
if (unlikely(to_free))
- kfree_skb_list(to_free);
+ kfree_skb_list_reason(to_free,
+ SKB_DROP_REASON_QDISC_DROP);
return rc;
}
@@ -3765,7 +3883,7 @@ no_lock_out:
}
spin_unlock(root_lock);
if (unlikely(to_free))
- kfree_skb_list(to_free);
+ kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3811,7 +3929,7 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(skb));
skb_dst_force(skb);
- netif_rx_ni(skb);
+ netif_rx(skb);
return 0;
}
EXPORT_SYMBOL(dev_loopback_xmit);
@@ -3840,7 +3958,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
case TC_ACT_SHOT:
mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -3860,6 +3978,25 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
}
+
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
+{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
#endif /* CONFIG_NET_EGRESS */
#ifdef CONFIG_XPS
@@ -4002,35 +4139,30 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
}
/**
- * __dev_queue_xmit - transmit a buffer
- * @skb: buffer to transmit
- * @sb_dev: suboordinate device used for L2 forwarding offload
+ * __dev_queue_xmit() - transmit a buffer
+ * @skb: buffer to transmit
+ * @sb_dev: suboordinate device used for L2 forwarding offload
*
- * Queue a buffer for transmission to a network device. The caller must
- * have set the device and priority and built the buffer before calling
- * this function. The function can be called from an interrupt.
+ * Queue a buffer for transmission to a network device. The caller must
+ * have set the device and priority and built the buffer before calling
+ * this function. The function can be called from an interrupt.
*
- * A negative errno code is returned on a failure. A success does not
- * guarantee the frame will be transmitted as it may be dropped due
- * to congestion or traffic shaping.
+ * When calling this method, interrupts MUST be enabled. This is because
+ * the BH enable code must have IRQs enabled so that it will not deadlock.
*
- * -----------------------------------------------------------------------------------
- * I notice this method can also return errors from the queue disciplines,
- * including NET_XMIT_DROP, which is a positive value. So, errors can also
- * be positive.
+ * Regardless of the return value, the skb is consumed, so it is currently
+ * difficult to retry a send to this method. (You can bump the ref count
+ * before sending to hold a reference for retry if you are careful.)
*
- * Regardless of the return value, the skb is consumed, so it is currently
- * difficult to retry a send to this method. (You can bump the ref count
- * before sending to hold a reference for retry if you are careful.)
- *
- * When calling this method, interrupts MUST be enabled. This is because
- * the BH enable code must have IRQs enabled so that it will not deadlock.
- * --BLG
+ * Return:
+ * * 0 - buffer successfully transmitted
+ * * positive qdisc return code - NET_XMIT_DROP etc.
+ * * negative errno - other errors
*/
-static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
+int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
struct net_device *dev = skb->dev;
- struct netdev_queue *txq;
+ struct netdev_queue *txq = NULL;
struct Qdisc *q;
int rc = -ENOMEM;
bool again = false;
@@ -4058,11 +4190,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb)
goto out;
}
+
+ netdev_xmit_skip_txqueue(false);
+
nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
nf_skip_egress(skb, false);
+
+ if (netdev_xmit_txqueue_skipped())
+ txq = netdev_tx_queue_mapping(dev, skb);
}
#endif
/* If device/qdisc don't need skb->dst, release it right now while
@@ -4073,7 +4211,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
else
skb_dst_force(skb);
- txq = netdev_core_pick_tx(dev, skb, sb_dev);
+ if (!txq)
+ txq = netdev_core_pick_tx(dev, skb, sb_dev);
+
q = rcu_dereference_bh(txq->qdisc);
trace_net_dev_queue(skb);
@@ -4108,7 +4248,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb)
goto out;
- PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
@@ -4136,25 +4275,14 @@ recursion_alert:
rc = -ENETDOWN;
rcu_read_unlock_bh();
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb_list(skb);
return rc;
out:
rcu_read_unlock_bh();
return rc;
}
-
-int dev_queue_xmit(struct sk_buff *skb)
-{
- return __dev_queue_xmit(skb, NULL);
-}
-EXPORT_SYMBOL(dev_queue_xmit);
-
-int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
-{
- return __dev_queue_xmit(skb, sb_dev);
-}
-EXPORT_SYMBOL(dev_queue_xmit_accel);
+EXPORT_SYMBOL(__dev_queue_xmit);
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
@@ -4174,7 +4302,6 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
skb_set_queue_mapping(skb, queue_id);
txq = skb_get_tx_queue(dev, skb);
- PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
local_bh_disable();
@@ -4188,7 +4315,7 @@ int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_enable();
return ret;
drop:
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb_list(skb);
return NET_XMIT_DROP;
}
@@ -4202,6 +4329,7 @@ int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
+unsigned int sysctl_skb_defer_max __read_mostly = 64;
int netdev_budget __read_mostly = 300;
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
@@ -4217,6 +4345,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
{
struct task_struct *thread;
+ lockdep_assert_irqs_disabled();
+
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
* napi_enable()/dev_set_threaded().
@@ -4451,16 +4581,25 @@ static void rps_trigger_softirq(void *data)
#endif /* CONFIG_RPS */
+/* Called from hardirq (IPI) context */
+static void trigger_rx_softirq(void *data)
+{
+ struct softnet_data *sd = data;
+
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ smp_store_release(&sd->defer_ipi_scheduled, 0);
+}
+
/*
* Check if this softnet_data structure is another cpu one
* If yes, queue it to our IPI list and return 1
* If no, return 0
*/
-static int rps_ipi_queued(struct softnet_data *sd)
+static int napi_schedule_rps(struct softnet_data *sd)
{
-#ifdef CONFIG_RPS
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
+#ifdef CONFIG_RPS
if (sd != mysd) {
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
@@ -4469,6 +4608,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
return 1;
}
#endif /* CONFIG_RPS */
+ __napi_schedule_irqoff(&mysd->backlog);
return 0;
}
@@ -4519,15 +4659,15 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
{
+ enum skb_drop_reason reason;
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
sd = &per_cpu(softnet_data, cpu);
- local_irq_save(flags);
-
- rps_lock(sd);
+ rps_lock_irqsave(sd, &flags);
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
@@ -4536,29 +4676,25 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
input_queue_tail_incr_save(sd, qtail);
- rps_unlock(sd);
- local_irq_restore(flags);
+ rps_unlock_irq_restore(sd, &flags);
return NET_RX_SUCCESS;
}
/* Schedule NAPI for backlog device
* We can use non atomic operation since we own the queue lock
*/
- if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
- if (!rps_ipi_queued(sd))
- ____napi_schedule(sd, &sd->backlog);
- }
+ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
+ napi_schedule_rps(sd);
goto enqueue;
}
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
drop:
sd->dropped++;
- rps_unlock(sd);
-
- local_irq_restore(flags);
+ rps_unlock_irq_restore(sd, &flags);
- atomic_long_inc(&skb->dev->rx_dropped);
- kfree_skb(skb);
+ dev_core_stats_rx_dropped_inc(skb->dev);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -4727,7 +4863,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
}
/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
+ * network taps in order to match in-driver-XDP behavior. This also means
+ * that XDP packets are able to starve other packets going through a qdisc,
+ * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
+ * queues, so they do not have this starvation issue.
*/
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
{
@@ -4739,7 +4878,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
txq = netdev_core_pick_tx(dev, skb, NULL);
cpu = smp_processor_id();
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_xmit_stopped(txq)) {
+ if (!netif_xmit_frozen_or_drv_stopped(txq)) {
rc = netdev_start_xmit(skb, dev, txq, 0);
if (dev_xmit_complete(rc))
free_skb = false;
@@ -4747,6 +4886,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
HARD_TX_UNLOCK(dev, txq);
if (free_skb) {
trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb(skb);
}
}
@@ -4778,7 +4918,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
}
return XDP_PASS;
out_redir:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
EXPORT_SYMBOL_GPL(do_xdp_generic);
@@ -4796,7 +4936,6 @@ static int netif_rx_internal(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- preempt_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -4806,78 +4945,72 @@ static int netif_rx_internal(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- preempt_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
- put_cpu();
+ ret = enqueue_to_backlog(skb, smp_processor_id(), &qtail);
}
return ret;
}
/**
+ * __netif_rx - Slightly optimized version of netif_rx
+ * @skb: buffer to post
+ *
+ * This behaves as netif_rx except that it does not disable bottom halves.
+ * As a result this function may only be invoked from the interrupt context
+ * (either hard or soft interrupt).
+ */
+int __netif_rx(struct sk_buff *skb)
+{
+ int ret;
+
+ lockdep_assert_once(hardirq_count() | softirq_count());
+
+ trace_netif_rx_entry(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+ return ret;
+}
+EXPORT_SYMBOL(__netif_rx);
+
+/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
- * the upper (protocol) levels to process. It always succeeds. The buffer
- * may be dropped during processing for congestion control or by the
- * protocol layers.
+ * the upper (protocol) levels to process via the backlog NAPI device. It
+ * always succeeds. The buffer may be dropped during processing for
+ * congestion control or by the protocol layers.
+ * The network buffer is passed via the backlog NAPI device. Modern NIC
+ * driver should use NAPI and GRO.
+ * This function can used from interrupt and from process context. The
+ * caller from process context must not disable interrupts before invoking
+ * this function.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_DROP (packet was dropped)
*
*/
-
int netif_rx(struct sk_buff *skb)
{
+ bool need_bh_off = !(hardirq_count() | softirq_count());
int ret;
+ if (need_bh_off)
+ local_bh_disable();
trace_netif_rx_entry(skb);
-
ret = netif_rx_internal(skb);
trace_netif_rx_exit(ret);
-
+ if (need_bh_off)
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL(netif_rx);
-int netif_rx_ni(struct sk_buff *skb)
-{
- int err;
-
- trace_netif_rx_ni_entry(skb);
-
- preempt_disable();
- err = netif_rx_internal(skb);
- if (local_softirq_pending())
- do_softirq();
- preempt_enable();
- trace_netif_rx_ni_exit(err);
-
- return err;
-}
-EXPORT_SYMBOL(netif_rx_ni);
-
-int netif_rx_any_context(struct sk_buff *skb)
-{
- /*
- * If invoked from contexts which do not invoke bottom half
- * processing either at return from interrupt or when softrqs are
- * reenabled, use netif_rx_ni() which invokes bottomhalf processing
- * directly.
- */
- if (in_interrupt())
- return netif_rx(skb);
- else
- return netif_rx_ni(skb);
-}
-EXPORT_SYMBOL(netif_rx_any_context);
-
static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -5001,7 +5134,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
break;
case TC_ACT_SHOT:
mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
@@ -5319,10 +5452,10 @@ check_vlan_id:
} else {
drop:
if (!deliver_exact)
- atomic_long_inc(&skb->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(skb->dev);
else
- atomic_long_inc(&skb->dev->rx_nohandler);
- kfree_skb(skb);
+ dev_core_stats_rx_nohandler_inc(skb->dev);
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -5650,8 +5783,7 @@ static void flush_backlog(struct work_struct *work)
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
@@ -5659,8 +5791,7 @@ static void flush_backlog(struct work_struct *work)
input_queue_head_incr(sd);
}
}
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
@@ -5678,16 +5809,14 @@ static bool flush_required(int cpu)
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
bool do_flush;
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
/* as insertion into process_queue happens with the rps lock held,
* process_queue access may race only with dequeue
*/
do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
!skb_queue_empty_lockless(&sd->process_queue);
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
return do_flush;
#endif
@@ -5802,8 +5931,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
- local_irq_disable();
- rps_lock(sd);
+ rps_lock_irq_disable(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
@@ -5819,8 +5947,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
skb_queue_splice_tail_init(&sd->input_pkt_queue,
&sd->process_queue);
}
- rps_unlock(sd);
- local_irq_enable();
+ rps_unlock_irq_enable(sd);
}
return work;
@@ -6230,8 +6357,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
-void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight)
+void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
{
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
@@ -6264,7 +6391,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
}
-EXPORT_SYMBOL(netif_napi_add);
+EXPORT_SYMBOL(netif_napi_add_weight);
void napi_disable(struct napi_struct *n)
{
@@ -6493,6 +6620,28 @@ static int napi_threaded_poll(void *data)
return 0;
}
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+ struct sk_buff *skb, *next;
+ unsigned long flags;
+
+ /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+ if (!READ_ONCE(sd->defer_list))
+ return;
+
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ skb = sd->defer_list;
+ sd->defer_list = NULL;
+ sd->defer_count = 0;
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ while (skb != NULL) {
+ next = skb->next;
+ napi_consume_skb(skb, 1);
+ skb = next;
+ }
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -6509,9 +6658,11 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
for (;;) {
struct napi_struct *n;
+ skb_defer_free_flush(sd);
+
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- return;
+ goto end;
break;
}
@@ -6538,6 +6689,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
+end:;
}
struct netdev_adjacent {
@@ -7145,6 +7297,16 @@ static int __netdev_update_upper_level(struct net_device *dev,
return 0;
}
+#ifdef CONFIG_LOCKDEP
+static LIST_HEAD(net_unlink_list);
+
+static void net_unlink_todo(struct net_device *dev)
+{
+ if (list_empty(&dev->unlink_list))
+ list_add_tail(&dev->unlink_list, &net_unlink_list);
+}
+#endif
+
static int __netdev_update_lower_level(struct net_device *dev,
struct netdev_nested_priv *priv)
{
@@ -7727,6 +7889,242 @@ void netdev_bonding_info_change(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_bonding_info_change);
+static int netdev_offload_xstats_enable_l3(struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+ int err;
+ int rc;
+
+ dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3),
+ GFP_KERNEL);
+ if (!dev->offload_xstats_l3)
+ return -ENOMEM;
+
+ rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE,
+ NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ err = notifier_to_errno(rc);
+ if (err)
+ goto free_stats;
+
+ return 0;
+
+free_stats:
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+ return err;
+}
+
+int netdev_offload_xstats_enable(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return netdev_offload_xstats_enable_l3(dev, extack);
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enable);
+
+static void netdev_offload_xstats_disable_l3(struct net_device *dev)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+
+ call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+}
+
+int netdev_offload_xstats_disable(struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ if (!netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ netdev_offload_xstats_disable_l3(dev);
+ return 0;
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_disable);
+
+static void netdev_offload_xstats_disable_all(struct net_device *dev)
+{
+ netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
+}
+
+static struct rtnl_hw_stats64 *
+netdev_offload_xstats_get_ptr(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return dev->offload_xstats_l3;
+ }
+
+ WARN_ON(1);
+ return NULL;
+}
+
+bool netdev_offload_xstats_enabled(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ return netdev_offload_xstats_get_ptr(dev, type);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enabled);
+
+struct netdev_notifier_offload_xstats_ru {
+ bool used;
+};
+
+struct netdev_notifier_offload_xstats_rd {
+ struct rtnl_hw_stats64 stats;
+ bool used;
+};
+
+static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
+ const struct rtnl_hw_stats64 *src)
+{
+ dest->rx_packets += src->rx_packets;
+ dest->tx_packets += src->tx_packets;
+ dest->rx_bytes += src->rx_bytes;
+ dest->tx_bytes += src->tx_bytes;
+ dest->rx_errors += src->rx_errors;
+ dest->tx_errors += src->tx_errors;
+ dest->rx_dropped += src->rx_dropped;
+ dest->tx_dropped += src->tx_dropped;
+ dest->multicast += src->multicast;
+}
+
+static int netdev_offload_xstats_get_used(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_ru report_used = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_used = &report_used,
+ };
+ int rc;
+
+ WARN_ON(!netdev_offload_xstats_enabled(dev, type));
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED,
+ &info.info);
+ *p_used = report_used.used;
+ return notifier_to_errno(rc);
+}
+
+static int netdev_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_rd report_delta = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_delta = &report_delta,
+ };
+ struct rtnl_hw_stats64 *stats;
+ int rc;
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return -EINVAL;
+
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
+ &info.info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ netdev_hw_stats64_add(stats, &report_delta.stats);
+
+ if (p_stats)
+ *p_stats = *stats;
+ *p_used = report_delta.used;
+
+ return notifier_to_errno(rc);
+}
+
+int netdev_offload_xstats_get(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats, bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (p_stats)
+ return netdev_offload_xstats_get_stats(dev, type, p_stats,
+ p_used, extack);
+ else
+ return netdev_offload_xstats_get_used(dev, type, p_used,
+ extack);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_get);
+
+void
+netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
+ const struct rtnl_hw_stats64 *stats)
+{
+ report_delta->used = true;
+ netdev_hw_stats64_add(&report_delta->stats, stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_delta);
+
+void
+netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
+{
+ report_used->used = true;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_used);
+
+void netdev_offload_xstats_push_delta(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ const struct rtnl_hw_stats64 *p_stats)
+{
+ struct rtnl_hw_stats64 *stats;
+
+ ASSERT_RTNL();
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return;
+
+ netdev_hw_stats64_add(stats, p_stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_push_delta);
+
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
* @dev: device
@@ -8347,7 +8745,6 @@ void dev_set_group(struct net_device *dev, int new_group)
{
dev->group = new_group;
}
-EXPORT_SYMBOL(dev_set_group);
/**
* dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
@@ -8462,7 +8859,6 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
return -ENODEV;
return ops->ndo_change_carrier(dev, new_carrier);
}
-EXPORT_SYMBOL(dev_change_carrier);
/**
* dev_get_phys_port_id - Get device physical port ID
@@ -8480,7 +8876,6 @@ int dev_get_phys_port_id(struct net_device *dev,
return -EOPNOTSUPP;
return ops->ndo_get_phys_port_id(dev, ppid);
}
-EXPORT_SYMBOL(dev_get_phys_port_id);
/**
* dev_get_phys_port_name - Get device physical port name
@@ -8503,7 +8898,6 @@ int dev_get_phys_port_name(struct net_device *dev,
}
return devlink_compat_phys_port_name_get(dev, name, len);
}
-EXPORT_SYMBOL(dev_get_phys_port_name);
/**
* dev_get_port_parent_id - Get the device's port parent identifier
@@ -8585,7 +8979,6 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
dev->proto_down = proto_down;
return 0;
}
-EXPORT_SYMBOL(dev_change_proto_down);
/**
* dev_change_proto_down_reason - proto down reason
@@ -8610,7 +9003,6 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
}
}
}
-EXPORT_SYMBOL(dev_change_proto_down_reason);
struct bpf_xdp_link {
struct bpf_link link;
@@ -8981,6 +9373,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
goto out_unlock;
}
old_prog = link->prog;
+ if (old_prog->type != new_prog->type ||
+ old_prog->expected_attach_type != new_prog->expected_attach_type) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (old_prog == new_prog) {
/* no-op, don't disturb drivers */
bpf_prog_put(new_prog);
@@ -9131,13 +9529,13 @@ static int dev_new_index(struct net *net)
}
/* Delayed registration/unregisteration */
-static LIST_HEAD(net_todo_list);
+LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
- dev_net(dev)->dev_unreg_count++;
+ atomic_inc(&dev_net(dev)->dev_unreg_count);
}
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
@@ -9538,22 +9936,14 @@ void netif_tx_stop_all_queues(struct net_device *dev)
EXPORT_SYMBOL(netif_tx_stop_all_queues);
/**
- * register_netdevice - register a network device
- * @dev: device to register
+ * register_netdevice() - register a network device
+ * @dev: device to register
*
- * Take a completed network device structure and add it to the kernel
- * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
- * chain. 0 is returned on success. A negative errno code is returned
- * on a failure to set up the device, or if the name is a duplicate.
- *
- * Callers must hold the rtnl semaphore. You may want
- * register_netdev() instead of this.
- *
- * BUGS:
- * The locking appears insufficient to guarantee two parallel registers
- * will not get the same name.
+ * Take a prepared network device structure and make it externally accessible.
+ * A %NETDEV_REGISTER message is sent to the netdev notifier chain.
+ * Callers must hold the rtnl lock - you may want register_netdev()
+ * instead of this.
*/
-
int register_netdevice(struct net_device *dev)
{
int ret;
@@ -9659,11 +10049,11 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret) {
- dev->reg_state = NETREG_UNREGISTERED;
+ write_lock(&dev_base_lock);
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
goto err_uninit;
- }
- dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
@@ -9677,8 +10067,10 @@ int register_netdevice(struct net_device *dev)
linkwatch_init_dev(dev);
dev_init_scheduler(dev);
- dev_hold(dev);
+
+ dev_hold_track(dev, &dev->dev_registered_tracker, GFP_KERNEL);
list_netdevice(dev);
+
add_device_randomness(dev->dev_addr, dev->addr_len);
/* If the device has permanent device address, driver should
@@ -9807,8 +10199,8 @@ int netdev_unregister_timeout_secs __read_mostly = 10;
#define WAIT_REFS_MIN_MSECS 1
#define WAIT_REFS_MAX_MSECS 250
/**
- * netdev_wait_allrefs - wait until all references are gone.
- * @dev: target net_device
+ * netdev_wait_allrefs_any - wait until all references are gone.
+ * @list: list of net_devices to wait on
*
* This is called when unregistering network devices.
*
@@ -9818,37 +10210,42 @@ int netdev_unregister_timeout_secs __read_mostly = 10;
* We can get stuck here if buggy protocols don't correctly
* call dev_put.
*/
-static void netdev_wait_allrefs(struct net_device *dev)
+static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
{
unsigned long rebroadcast_time, warning_time;
- int wait = 0, refcnt;
-
- linkwatch_forget_dev(dev);
+ struct net_device *dev;
+ int wait = 0;
rebroadcast_time = warning_time = jiffies;
- refcnt = netdev_refcnt_read(dev);
- while (refcnt != 1) {
+ list_for_each_entry(dev, list, todo_list)
+ if (netdev_refcnt_read(dev) == 1)
+ return dev;
+
+ while (true) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
rtnl_lock();
/* Rebroadcast unregister notification */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+ list_for_each_entry(dev, list, todo_list)
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
__rtnl_unlock();
rcu_barrier();
rtnl_lock();
- if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
- &dev->state)) {
- /* We must not have linkwatch events
- * pending on unregister. If this
- * happens, we simply run the queue
- * unscheduled, resulting in a noop
- * for this device.
- */
- linkwatch_run_queue();
- }
+ list_for_each_entry(dev, list, todo_list)
+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &dev->state)) {
+ /* We must not have linkwatch events
+ * pending on unregister. If this
+ * happens, we simply run the queue
+ * unscheduled, resulting in a noop
+ * for this device.
+ */
+ linkwatch_run_queue();
+ break;
+ }
__rtnl_unlock();
@@ -9863,14 +10260,18 @@ static void netdev_wait_allrefs(struct net_device *dev)
wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
}
- refcnt = netdev_refcnt_read(dev);
+ list_for_each_entry(dev, list, todo_list)
+ if (netdev_refcnt_read(dev) == 1)
+ return dev;
- if (refcnt != 1 &&
- time_after(jiffies, warning_time +
+ if (time_after(jiffies, warning_time +
netdev_unregister_timeout_secs * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
- dev->name, refcnt);
- ref_tracker_dir_print(&dev->refcnt_tracker, 10);
+ list_for_each_entry(dev, list, todo_list) {
+ pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ dev->name, netdev_refcnt_read(dev));
+ ref_tracker_dir_print(&dev->refcnt_tracker, 10);
+ }
+
warning_time = jiffies;
}
}
@@ -9902,6 +10303,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
*/
void netdev_run_todo(void)
{
+ struct net_device *dev, *tmp;
struct list_head list;
#ifdef CONFIG_LOCKDEP
struct list_head unlink_list;
@@ -9922,26 +10324,26 @@ void netdev_run_todo(void)
__rtnl_unlock();
-
/* Wait for rcu callbacks to finish before next phase */
if (!list_empty(&list))
rcu_barrier();
- while (!list_empty(&list)) {
- struct net_device *dev
- = list_first_entry(&list, struct net_device, todo_list);
- list_del(&dev->todo_list);
-
+ list_for_each_entry_safe(dev, tmp, &list, todo_list) {
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
- pr_err("network todo '%s' but state %d\n",
- dev->name, dev->reg_state);
- dump_stack();
+ netdev_WARN(dev, "run_todo but not unregistering\n");
+ list_del(&dev->todo_list);
continue;
}
+ write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
+ write_unlock(&dev_base_lock);
+ linkwatch_forget_dev(dev);
+ }
- netdev_wait_allrefs(dev);
+ while (!list_empty(&list)) {
+ dev = netdev_wait_allrefs_any(&list);
+ list_del(&dev->todo_list);
/* paranoia */
BUG_ON(netdev_refcnt_read(dev) != 1);
@@ -9957,11 +10359,8 @@ void netdev_run_todo(void)
if (dev->needs_free_netdev)
free_netdev(dev);
- /* Report a network device has been unregistered */
- rtnl_lock();
- dev_net(dev)->dev_unreg_count--;
- __rtnl_unlock();
- wake_up(&netdev_unregistering_wq);
+ if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count))
+ wake_up(&netdev_unregistering_wq);
/* Free network device */
kobject_put(&dev->dev.kobj);
@@ -9997,6 +10396,21 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
+{
+ struct net_device_core_stats __percpu *p;
+
+ p = alloc_percpu_gfp(struct net_device_core_stats,
+ GFP_ATOMIC | __GFP_NOWARN);
+
+ if (p && cmpxchg(&dev->core_stats, NULL, p))
+ free_percpu(p);
+
+ /* This READ_ONCE() pairs with the cmpxchg() above */
+ return READ_ONCE(dev->core_stats);
+}
+EXPORT_SYMBOL(netdev_core_stats_alloc);
+
/**
* dev_get_stats - get network device statistics
* @dev: device to get statistics from
@@ -10011,6 +10425,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
struct rtnl_link_stats64 *storage)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ const struct net_device_core_stats __percpu *p;
if (ops->ndo_get_stats64) {
memset(storage, 0, sizeof(*storage));
@@ -10020,9 +10435,21 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
+
+ /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
+ p = READ_ONCE(dev->core_stats);
+ if (p) {
+ const struct net_device_core_stats *core_stats;
+ int i;
+
+ for_each_possible_cpu(i) {
+ core_stats = per_cpu_ptr(p, i);
+ storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+ storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+ storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
+ storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
+ }
+ }
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
@@ -10166,7 +10593,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
goto free_dev;
- dev_hold(dev);
+ __dev_hold(dev);
#else
refcount_set(&dev->dev_refcnt, 1);
#endif
@@ -10179,9 +10606,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
- dev->gro_max_size = GRO_MAX_SIZE;
+ dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+ dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
+ dev->tso_max_segs = TSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP
@@ -10284,6 +10713,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
#endif
+ free_percpu(dev->core_stats);
+ dev->core_stats = NULL;
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
@@ -10387,9 +10818,10 @@ void unregister_netdevice_many(struct list_head *head)
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- unlist_netdevice(dev);
-
+ write_lock(&dev_base_lock);
+ unlist_netdevice(dev, false);
dev->reg_state = NETREG_UNREGISTERING;
+ write_unlock(&dev_base_lock);
}
flush_all_backlogs();
@@ -10403,6 +10835,8 @@ void unregister_netdevice_many(struct list_head *head)
dev_xdp_uninstall(dev);
+ netdev_offload_xstats_disable_all(dev);
+
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
*/
@@ -10443,7 +10877,7 @@ void unregister_netdevice_many(struct list_head *head)
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- dev_put(dev);
+ dev_put_track(dev, &dev->dev_registered_tracker);
net_set_todo(dev);
}
@@ -10534,7 +10968,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev);
+ unlist_netdevice(dev, true);
synchronize_net();
@@ -10668,11 +11102,11 @@ static int dev_cpu_dead(unsigned int oldcpu)
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
- netif_rx_ni(skb);
+ netif_rx(skb);
input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
- netif_rx_ni(skb);
+ netif_rx(skb);
input_queue_head_incr(oldsd);
}
@@ -10726,8 +11160,7 @@ static int __net_init netdev_init(struct net *net)
BUILD_BUG_ON(GRO_HASH_BUCKETS >
8 * sizeof_field(struct napi_struct, gro_bitmask));
- if (net != &init_net)
- INIT_LIST_HEAD(&net->dev_base_head);
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
@@ -10843,14 +11276,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
.exit = netdev_exit,
};
-static void __net_exit default_device_exit(struct net *net)
+static void __net_exit default_device_exit_net(struct net *net)
{
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
* initial network namespace
*/
- rtnl_lock();
+ ASSERT_RTNL();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
@@ -10874,35 +11307,6 @@ static void __net_exit default_device_exit(struct net *net)
BUG();
}
}
- rtnl_unlock();
-}
-
-static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
-{
- /* Return with the rtnl_lock held when there are no network
- * devices unregistering in any network namespace in net_list.
- */
- struct net *net;
- bool unregistering;
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- add_wait_queue(&netdev_unregistering_wq, &wait);
- for (;;) {
- unregistering = false;
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- if (net->dev_unreg_count > 0) {
- unregistering = true;
- break;
- }
- }
- if (!unregistering)
- break;
- __rtnl_unlock();
-
- wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
- }
- remove_wait_queue(&netdev_unregistering_wq, &wait);
}
static void __net_exit default_device_exit_batch(struct list_head *net_list)
@@ -10916,18 +11320,12 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- /* To prevent network device cleanup code from dereferencing
- * loopback devices or network devices that have been freed
- * wait here for all pending unregistrations to complete,
- * before unregistring the loopback device and allowing the
- * network namespace be freed.
- *
- * The netdev todo list containing all network devices
- * unregistrations that happen in default_device_exit_batch
- * will run in the rtnl_unlock() at the end of
- * default_device_exit_batch.
- */
- rtnl_lock_unregistering(net_list);
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ default_device_exit_net(net);
+ cond_resched();
+ }
+
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10941,7 +11339,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
static struct pernet_operations __net_initdata default_device_ops = {
- .exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};
@@ -10996,6 +11393,8 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
sd->cpu = i;
#endif
+ INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
+ spin_lock_init(&sd->defer_lock);
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
diff --git a/net/core/dev.h b/net/core/dev.h
new file mode 100644
index 000000000000..cbb8a925175a
--- /dev/null
+++ b/net/core/dev.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_DEV_H
+#define _NET_CORE_DEV_H
+
+#include <linux/types.h>
+
+struct net;
+struct net_device;
+struct netdev_bpf;
+struct netdev_phys_item_id;
+struct netlink_ext_ack;
+
+/* Random bits of netdevice that don't need to be exposed */
+#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
+struct sd_flow_limit {
+ u64 count;
+ unsigned int num_buckets;
+ unsigned int history_head;
+ u16 history[FLOW_LIMIT_HISTORY];
+ u8 buckets[];
+};
+
+extern int netdev_flow_limit_table_len;
+
+#ifdef CONFIG_PROC_FS
+int __init dev_proc_init(void);
+#else
+#define dev_proc_init() 0
+#endif
+
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_run_queue(void);
+
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
+void dev_addr_check(struct net_device *dev);
+
+/* sysctls not referred to from outside net/core/ */
+extern int netdev_budget;
+extern unsigned int netdev_budget_usecs;
+extern unsigned int sysctl_skb_defer_max;
+extern int netdev_tstamp_prequeue;
+extern int netdev_unregister_timeout_secs;
+extern int weight_p;
+extern int dev_weight_rx_bias;
+extern int dev_weight_tx_bias;
+
+/* rtnl helpers */
+extern struct list_head net_todo_list;
+void netdev_run_todo(void);
+
+/* netdev management, shared between various uAPI entry points */
+struct netdev_name_node {
+ struct hlist_node hlist;
+ struct list_head list;
+ struct net_device *dev;
+ const char *name;
+};
+
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_change_name(struct net_device *dev, const char *newname);
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
+
+int dev_validate_mtu(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+
+int dev_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len);
+
+int dev_change_proto_down(struct net_device *dev, bool proto_down);
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value);
+
+typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags);
+
+int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len);
+void dev_set_group(struct net_device *dev, int new_group);
+int dev_change_carrier(struct net_device *dev, bool new_carrier);
+
+void __dev_set_rx_mode(struct net_device *dev);
+
+static inline void netif_set_gso_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_size, size);
+}
+
+static inline void netif_set_gso_max_segs(struct net_device *dev,
+ unsigned int segs)
+{
+ /* dev->gso_max_segs is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_segs, segs);
+}
+
+static inline void netif_set_gro_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* This pairs with the READ_ONCE() in skb_gro_receive() */
+ WRITE_ONCE(dev->gro_max_size, size);
+}
+
+#endif
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index bead38ca50bd..baa63dee2829 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -12,6 +12,8 @@
#include <linux/export.h>
#include <linux/list.h>
+#include "dev.h"
+
/*
* General list handling functions
*/
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 1b807d119da5..4f6be442ae7e 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -10,6 +10,8 @@
#include <net/dsa.h>
#include <net/wext.h>
+#include "dev.h"
+
/*
* Map an interface index to its name (SIOCGIFNAME)
*/
diff --git a/net/core/devlink.c b/net/core/devlink.c
index fcd9f6d85cf1..5cc88490f18f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -54,6 +54,8 @@ struct devlink {
struct list_head trap_list;
struct list_head trap_group_list;
struct list_head trap_policer_list;
+ struct list_head linecard_list;
+ struct mutex linecards_lock; /* protects linecard_list */
const struct devlink_ops *ops;
u64 features;
struct xarray snapshot_ids;
@@ -70,6 +72,23 @@ struct devlink {
char priv[] __aligned(NETDEV_ALIGN);
};
+struct devlink_linecard_ops;
+struct devlink_linecard_type;
+
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ refcount_t refcount;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+};
+
/**
* struct devlink_resource - devlink resource
* @name: name of the resource
@@ -225,6 +244,33 @@ struct devlink *__must_check devlink_try_get(struct devlink *devlink)
return NULL;
}
+void devl_assert_locked(struct devlink *devlink)
+{
+ lockdep_assert_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_assert_locked);
+
+#ifdef CONFIG_LOCKDEP
+/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */
+bool devl_lock_is_held(struct devlink *devlink)
+{
+ return lockdep_is_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock_is_held);
+#endif
+
+void devl_lock(struct devlink *devlink)
+{
+ mutex_lock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock);
+
+void devl_unlock(struct devlink *devlink)
+{
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_unlock);
+
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
@@ -370,6 +416,58 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
return ERR_PTR(-EINVAL);
}
+static struct devlink_linecard *
+devlink_linecard_get_by_index(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ struct devlink_linecard *devlink_linecard;
+
+ list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
+ if (devlink_linecard->index == linecard_index)
+ return devlink_linecard;
+ }
+ return NULL;
+}
+
+static bool devlink_linecard_index_exists(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ return devlink_linecard_get_by_index(devlink, linecard_index);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
+ u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
+ struct devlink_linecard *linecard;
+
+ mutex_lock(&devlink->linecards_lock);
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (linecard)
+ refcount_inc(&linecard->refcount);
+ mutex_unlock(&devlink->linecards_lock);
+ if (!linecard)
+ return ERR_PTR(-ENODEV);
+ return linecard;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_linecard_get_from_attrs(devlink, info->attrs);
+}
+
+static void devlink_linecard_put(struct devlink_linecard *linecard)
+{
+ if (refcount_dec_and_test(&linecard->refcount)) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ }
+}
+
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -590,16 +688,18 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
+#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(5)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_linecard *linecard;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -642,6 +742,13 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
goto unlock;
}
info->user_ptr[1] = rate_node;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard)) {
+ err = PTR_ERR(linecard);
+ goto unlock;
+ }
+ info->user_ptr[1] = linecard;
}
return 0;
@@ -656,9 +763,14 @@ unlock:
static void devlink_nl_post_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_linecard *linecard;
struct devlink *devlink;
devlink = info->user_ptr[0];
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = info->user_ptr[1];
+ devlink_linecard_put(linecard);
+ }
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
devlink_put(devlink);
@@ -1131,6 +1243,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
goto nla_put_failure;
if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
goto nla_put_failure;
+ if (devlink_port->linecard &&
+ nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
+ devlink_port->linecard->index))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -1541,35 +1657,20 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
return 0;
}
-static int devlink_port_split(struct devlink *devlink, u32 port_index,
- u32 count, struct netlink_ext_ack *extack)
-
-{
- if (devlink->ops->port_split)
- return devlink->ops->port_split(devlink, port_index, count,
- extack);
- return -EOPNOTSUPP;
-}
-
static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
struct genl_info *info)
{
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *devlink_port;
- u32 port_index;
u32 count;
- if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] ||
- !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
+ if (!info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
return -EINVAL;
+ if (!devlink->ops->port_split)
+ return -EOPNOTSUPP;
- devlink_port = devlink_port_get_from_info(devlink, info);
- port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
- if (IS_ERR(devlink_port))
- return -EINVAL;
-
if (!devlink_port->attrs.splittable) {
/* Split ports cannot be split. */
if (devlink_port->attrs.split)
@@ -1584,29 +1685,19 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
return -EINVAL;
}
- return devlink_port_split(devlink, port_index, count, info->extack);
-}
-
-static int devlink_port_unsplit(struct devlink *devlink, u32 port_index,
- struct netlink_ext_ack *extack)
-
-{
- if (devlink->ops->port_unsplit)
- return devlink->ops->port_unsplit(devlink, port_index, extack);
- return -EOPNOTSUPP;
+ return devlink->ops->port_split(devlink, devlink_port, count,
+ info->extack);
}
static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
struct genl_info *info)
{
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
- u32 port_index;
-
- if (!info->attrs[DEVLINK_ATTR_PORT_INDEX])
- return -EINVAL;
- port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
- return devlink_port_unsplit(devlink, port_index, info->extack);
+ if (!devlink->ops->port_unsplit)
+ return -EOPNOTSUPP;
+ return devlink->ops->port_unsplit(devlink, devlink_port, info->extack);
}
static int devlink_port_new_notifiy(struct devlink *devlink,
@@ -1962,6 +2053,322 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
return err;
}
+struct devlink_linecard_type {
+ const char *type;
+ const void *priv;
+};
+
+static int devlink_nl_linecard_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_linecard *linecard,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_linecard_type *linecard_type;
+ struct nlattr *attr;
+ void *hdr;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
+ goto nla_put_failure;
+ if (linecard->type &&
+ nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
+ goto nla_put_failure;
+
+ if (linecard->types_count) {
+ attr = nla_nest_start(msg,
+ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
+ if (!attr)
+ goto nla_put_failure;
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
+ linecard_type->type)) {
+ nla_nest_cancel(msg, attr);
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, attr);
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_linecard_notify(struct devlink_linecard *linecard,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
+ cmd != DEVLINK_CMD_LINECARD_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
+ NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_linecard *linecard = info->user_ptr[1];
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_linecard *linecard;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ unsigned long index;
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
+ mutex_lock(&devlink->linecards_lock);
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_put(devlink);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->linecards_lock);
+retry:
+ devlink_put(devlink);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static struct devlink_linecard_type *
+devlink_linecard_type_lookup(struct devlink_linecard *linecard,
+ const char *type)
+{
+ struct devlink_linecard_type *linecard_type;
+ int i;
+
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (!strcmp(type, linecard_type->type))
+ return linecard_type;
+ }
+ return NULL;
+}
+
+static int devlink_linecard_type_set(struct devlink_linecard *linecard,
+ const char *type,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_linecard_ops *ops = linecard->ops;
+ struct devlink_linecard_type *linecard_type;
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+
+ linecard_type = devlink_linecard_type_lookup(linecard, type);
+ if (!linecard_type) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
+ linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned");
+ err = -EBUSY;
+ /* Check if the line card is provisioned in the same
+ * way the user asks. In case it is, make the operation
+ * to return success.
+ */
+ if (ops->same_provision &&
+ ops->same_provision(linecard, linecard->priv,
+ linecard_type->type,
+ linecard_type->priv))
+ err = 0;
+ goto out;
+ }
+
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
+ linecard->type = linecard_type->type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = ops->provision(linecard, linecard->priv, linecard_type->type,
+ linecard_type->priv, extack);
+ if (err) {
+ /* Provisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ err = 0;
+ goto out;
+ }
+
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned");
+ err = 0;
+ goto out;
+ }
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = linecard->ops->unprovision(linecard, linecard->priv,
+ extack);
+ if (err) {
+ /* Unprovisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_linecard *linecard = info->user_ptr[1];
+ struct netlink_ext_ack *extack = info->extack;
+ int err;
+
+ if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
+ const char *type;
+
+ type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
+ if (strcmp(type, "")) {
+ err = devlink_linecard_type_set(linecard, type, extack);
+ if (err)
+ return err;
+ } else {
+ err = devlink_linecard_type_unset(linecard, extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -2866,15 +3273,11 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
{
struct devlink_rate *devlink_rate;
- /* Take the lock to sync with devlink_rate_nodes_destroy() */
- mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
- mutex_unlock(&devlink->lock);
NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
return -EBUSY;
}
- mutex_unlock(&devlink->lock);
return 0;
}
@@ -8591,6 +8994,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -8645,14 +9050,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_unsplit_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_PORT_NEW,
@@ -8667,6 +9072,19 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .doit = devlink_nl_cmd_linecard_get_doit,
+ .dumpit = devlink_nl_cmd_linecard_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_SET,
+ .doit = devlink_nl_cmd_linecard_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
+ },
+ {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
@@ -8733,14 +9151,12 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_get_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_ESWITCH_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
@@ -9047,6 +9463,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->rate_list);
+ INIT_LIST_HEAD(&devlink->linecard_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@@ -9058,6 +9475,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
INIT_LIST_HEAD(&devlink->trap_policer_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
+ mutex_init(&devlink->linecards_lock);
refcount_set(&devlink->refcount, 1);
init_completion(&devlink->comp);
@@ -9084,10 +9502,14 @@ static void devlink_notify_register(struct devlink *devlink)
struct devlink_param_item *param_item;
struct devlink_trap_item *trap_item;
struct devlink_port *devlink_port;
+ struct devlink_linecard *linecard;
struct devlink_rate *rate_node;
struct devlink_region *region;
devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+
list_for_each_entry(devlink_port, &devlink->port_list, list)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -9195,6 +9617,7 @@ void devlink_free(struct devlink *devlink)
{
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ mutex_destroy(&devlink->linecards_lock);
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
WARN_ON(!list_empty(&devlink->trap_policer_list));
@@ -9207,6 +9630,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
@@ -9249,6 +9673,32 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
cancel_delayed_work_sync(&devlink_port->type_warn_dw);
}
+int devl_port_register(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index)
+{
+ lockdep_assert_held(&devlink->lock);
+
+ if (devlink_port_index_exists(devlink, port_index))
+ return -EEXIST;
+
+ WARN_ON(devlink_port->devlink);
+ devlink_port->devlink = devlink;
+ devlink_port->index = port_index;
+ spin_lock_init(&devlink_port->type_lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ mutex_init(&devlink_port->reporters_lock);
+ list_add_tail(&devlink_port->list, &devlink->port_list);
+ INIT_LIST_HEAD(&devlink_port->param_list);
+ INIT_LIST_HEAD(&devlink_port->region_list);
+
+ INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+ devlink_port_type_warn_schedule(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_port_register);
+
/**
* devlink_port_register - Register devlink port
*
@@ -9266,29 +9716,28 @@ int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
unsigned int port_index)
{
- mutex_lock(&devlink->lock);
- if (devlink_port_index_exists(devlink, port_index)) {
- mutex_unlock(&devlink->lock);
- return -EEXIST;
- }
+ int err;
- WARN_ON(devlink_port->devlink);
- devlink_port->devlink = devlink;
- devlink_port->index = port_index;
- spin_lock_init(&devlink_port->type_lock);
- INIT_LIST_HEAD(&devlink_port->reporter_list);
- mutex_init(&devlink_port->reporters_lock);
- list_add_tail(&devlink_port->list, &devlink->port_list);
- INIT_LIST_HEAD(&devlink_port->param_list);
- INIT_LIST_HEAD(&devlink_port->region_list);
+ mutex_lock(&devlink->lock);
+ err = devl_port_register(devlink, devlink_port, port_index);
mutex_unlock(&devlink->lock);
- INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
- devlink_port_type_warn_schedule(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_port_register);
+void devl_port_unregister(struct devlink_port *devlink_port)
+{
+ lockdep_assert_held(&devlink_port->devlink->lock);
+
+ devlink_port_type_warn_cancel(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ list_del(&devlink_port->list);
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ WARN_ON(!list_empty(&devlink_port->region_list));
+ mutex_destroy(&devlink_port->reporters_lock);
+}
+EXPORT_SYMBOL_GPL(devl_port_unregister);
+
/**
* devlink_port_unregister - Unregister devlink port
*
@@ -9298,14 +9747,9 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
- devlink_port_type_warn_cancel(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
mutex_lock(&devlink->lock);
- list_del(&devlink_port->list);
+ devl_port_unregister(devlink_port);
mutex_unlock(&devlink->lock);
- WARN_ON(!list_empty(&devlink_port->reporter_list));
- WARN_ON(!list_empty(&devlink_port->region_list));
- mutex_destroy(&devlink_port->reporters_lock);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
@@ -9526,30 +9970,26 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
/**
- * devlink_rate_leaf_create - create devlink rate leaf
- *
+ * devl_rate_leaf_create - create devlink rate leaf
* @devlink_port: devlink port object to create rate object on
* @priv: driver private data
*
* Create devlink rate object of type leaf on provided @devlink_port.
- * Throws call trace if @devlink_port already has a devlink rate object.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
*/
-int
-devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
{
struct devlink *devlink = devlink_port->devlink;
struct devlink_rate *devlink_rate;
+ devl_assert_locked(devlink_port->devlink);
+
+ if (WARN_ON(devlink_port->devlink_rate))
+ return -EBUSY;
+
devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
if (!devlink_rate)
return -ENOMEM;
- mutex_lock(&devlink->lock);
- WARN_ON(devlink_port->devlink_rate);
devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
devlink_rate->devlink = devlink;
devlink_rate->devlink_port = devlink_port;
@@ -9557,12 +9997,42 @@ devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
list_add_tail(&devlink_rate->list, &devlink->rate_list);
devlink_port->devlink_rate = devlink_rate;
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
- mutex_unlock(&devlink->lock);
return 0;
}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
+
+int
+devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ int ret;
+
+ mutex_lock(&devlink->lock);
+ ret = devl_rate_leaf_create(devlink_port, priv);
+ mutex_unlock(&devlink->lock);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
+void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+ struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+
+ devl_assert_locked(devlink_port->devlink);
+ if (!devlink_rate)
+ return;
+
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ if (devlink_rate->parent)
+ refcount_dec(&devlink_rate->parent->refcnt);
+ list_del(&devlink_rate->list);
+ devlink_port->devlink_rate = NULL;
+ kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
+
/**
* devlink_rate_leaf_destroy - destroy devlink rate leaf
*
@@ -9579,32 +10049,25 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
return;
mutex_lock(&devlink->lock);
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
- if (devlink_rate->parent)
- refcount_dec(&devlink_rate->parent->refcnt);
- list_del(&devlink_rate->list);
- devlink_port->devlink_rate = NULL;
+ devl_rate_leaf_destroy(devlink_port);
mutex_unlock(&devlink->lock);
- kfree(devlink_rate);
}
EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
/**
- * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
- *
+ * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
* @devlink: devlink instance
*
* Unset parent for all rate objects and destroy all rate nodes
* on specified device.
- *
- * Context: Takes and release devlink->lock <mutex>.
*/
-void devlink_rate_nodes_destroy(struct devlink *devlink)
+void devl_rate_nodes_destroy(struct devlink *devlink)
{
static struct devlink_rate *devlink_rate, *tmp;
const struct devlink_ops *ops = devlink->ops;
- mutex_lock(&devlink->lock);
+ devl_assert_locked(devlink);
+
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
if (!devlink_rate->parent)
continue;
@@ -9625,10 +10088,42 @@ void devlink_rate_nodes_destroy(struct devlink *devlink)
kfree(devlink_rate);
}
}
+}
+EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
+
+/**
+ * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
+ *
+ * @devlink: devlink instance
+ *
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_nodes_destroy(struct devlink *devlink)
+{
+ mutex_lock(&devlink->lock);
+ devl_rate_nodes_destroy(devlink);
mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
+/**
+ * devlink_port_linecard_set - Link port with a linecard
+ *
+ * @devlink_port: devlink port
+ * @linecard: devlink linecard
+ */
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+ struct devlink_linecard *linecard)
+{
+ if (WARN_ON(devlink_port->devlink))
+ return;
+ devlink_port->linecard = linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
+
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{
@@ -9640,7 +10135,12 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- n = snprintf(name, len, "p%u", attrs->phys.port_number);
+ if (devlink_port->linecard)
+ n = snprintf(name, len, "l%u",
+ devlink_port->linecard->index);
+ if (n < len)
+ n += snprintf(name + n, len - n, "p%u",
+ attrs->phys.port_number);
if (n < len && attrs->split)
n += snprintf(name + n, len - n, "s%u",
attrs->phys.split_subport_number);
@@ -9695,6 +10195,207 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
return 0;
}
+static int devlink_linecard_types_init(struct devlink_linecard *linecard)
+{
+ struct devlink_linecard_type *linecard_type;
+ unsigned int count;
+ int i;
+
+ count = linecard->ops->types_count(linecard, linecard->priv);
+ linecard->types = kmalloc_array(count, sizeof(*linecard_type),
+ GFP_KERNEL);
+ if (!linecard->types)
+ return -ENOMEM;
+ linecard->types_count = count;
+
+ for (i = 0; i < count; i++) {
+ linecard_type = &linecard->types[i];
+ linecard->ops->types_get(linecard, linecard->priv, i,
+ &linecard_type->type,
+ &linecard_type->priv);
+ }
+ return 0;
+}
+
+static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
+{
+ kfree(linecard->types);
+}
+
+/**
+ * devlink_linecard_create - Create devlink linecard
+ *
+ * @devlink: devlink
+ * @linecard_index: driver-specific numerical identifier of the linecard
+ * @ops: linecards ops
+ * @priv: user priv pointer
+ *
+ * Create devlink linecard instance with provided linecard index.
+ * Caller can use any indexing, even hw-related one.
+ *
+ * Return: Line card structure or an ERR_PTR() encoded error code.
+ */
+struct devlink_linecard *
+devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
+{
+ struct devlink_linecard *linecard;
+ int err;
+
+ if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
+ !ops->types_count || !ops->types_get))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&devlink->linecards_lock);
+ if (devlink_linecard_index_exists(devlink, linecard_index)) {
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(-EEXIST);
+ }
+
+ linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
+ if (!linecard) {
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ linecard->devlink = devlink;
+ linecard->index = linecard_index;
+ linecard->ops = ops;
+ linecard->priv = priv;
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ mutex_init(&linecard->state_lock);
+
+ err = devlink_linecard_types_init(linecard);
+ if (err) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&linecard->list, &devlink->linecard_list);
+ refcount_set(&linecard->refcount, 1);
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ return linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_create);
+
+/**
+ * devlink_linecard_destroy - Destroy devlink linecard
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_destroy(struct devlink_linecard *linecard)
+{
+ struct devlink *devlink = linecard->devlink;
+
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+ mutex_lock(&devlink->linecards_lock);
+ list_del(&linecard->list);
+ devlink_linecard_types_fini(linecard);
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_linecard_put(linecard);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
+
+/**
+ * devlink_linecard_provision_set - Set provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ * @type: linecard type
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+ const char *type)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->type && strcmp(linecard->type, type));
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ linecard->type = type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
+
+/**
+ * devlink_linecard_provision_clear - Clear provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the unprovision() op call or
+ * as a result of the unprovision() op call asynchronously.
+ */
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
+
+/**
+ * devlink_linecard_provision_fail - Fail provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
+
+/**
+ * devlink_linecard_activate - Set linecard active
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_activate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
+ linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_activate);
+
+/**
+ * devlink_linecard_deactivate - Set linecard inactive
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_deactivate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ switch (linecard->state) {
+ case DEVLINK_LINECARD_STATE_ACTIVE:
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ break;
+ case DEVLINK_LINECARD_STATE_UNPROVISIONING:
+ /* Line card is being deactivated as part
+ * of unprovisioning flow.
+ */
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 7b288a121a41..41cac0e4834e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -48,10 +48,22 @@
static int trace_state = TRACE_OFF;
static bool monitor_hw;
+#undef EM
+#undef EMe
+
+#define EM(a, b) [a] = #b,
+#define EMe(a, b) [a] = #b
+
+/* drop_reasons is used to translate 'enum skb_drop_reason' to string,
+ * which is reported to user space.
+ */
+static const char * const drop_reasons[] = {
+ TRACE_SKB_DROP_REASON
+};
+
/* net_dm_mutex
*
* An overall lock guarding every operation coming from userspace.
- * It also guards the global 'hw_stats_list' list.
*/
static DEFINE_MUTEX(net_dm_mutex);
@@ -87,11 +99,9 @@ struct per_cpu_dm_data {
};
struct dm_hw_stat_delta {
- struct net_device *dev;
unsigned long last_rx;
- struct list_head list;
- struct rcu_head rcu;
unsigned long last_drop_val;
+ struct rcu_head rcu;
};
static struct genl_family net_drop_monitor_family;
@@ -102,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
-static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len;
@@ -126,6 +135,7 @@ struct net_dm_skb_cb {
struct devlink_trap_metadata *hw_metadata;
void *pc;
};
+ enum skb_drop_reason reason;
};
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -273,29 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
int work, int budget)
{
- struct dm_hw_stat_delta *new_stat;
-
+ struct net_device *dev = napi->dev;
+ struct dm_hw_stat_delta *stat;
/*
* Don't check napi structures with no associated device
*/
- if (!napi->dev)
+ if (!dev)
return;
rcu_read_lock();
- list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ stat = rcu_dereference(dev->dm_private);
+ if (stat) {
/*
* only add a note to our monitor buffer if:
- * 1) this is the dev we received on
- * 2) its after the last_rx delta
- * 3) our rx_dropped count has gone up
+ * 1) its after the last_rx delta
+ * 2) our rx_dropped count has gone up
*/
- if ((new_stat->dev == napi->dev) &&
- (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
- (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+ if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
+ (dev->stats.rx_dropped != stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
- new_stat->last_drop_val = napi->dev->stats.rx_dropped;
- new_stat->last_rx = jiffies;
- break;
+ stat->last_drop_val = dev->stats.rx_dropped;
+ stat->last_rx = jiffies;
}
}
rcu_read_unlock();
@@ -498,6 +506,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
{
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data;
+ struct net_dm_skb_cb *cb;
struct sk_buff *nskb;
unsigned long flags;
@@ -508,7 +517,11 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- NET_DM_SKB_CB(nskb)->pc = location;
+ if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0))
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ cb = NET_DM_SKB_CB(nskb);
+ cb->reason = reason;
+ cb->pc = location;
/* Override the timestamp because we care about the time when the
* packet was dropped.
*/
@@ -553,7 +566,8 @@ static size_t net_dm_in_port_size(void)
#define NET_DM_MAX_SYMBOL_LEN 40
-static size_t net_dm_packet_report_size(size_t payload_len)
+static size_t net_dm_packet_report_size(size_t payload_len,
+ enum skb_drop_reason reason)
{
size_t size;
@@ -574,6 +588,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
nla_total_size(sizeof(u32)) +
/* NET_DM_ATTR_PROTO */
nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_REASON */
+ nla_total_size(strlen(drop_reasons[reason]) + 1) +
/* NET_DM_ATTR_PAYLOAD */
nla_total_size(payload_len);
}
@@ -606,7 +622,7 @@ nla_put_failure:
static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
size_t payload_len)
{
- u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+ struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
char buf[NET_DM_MAX_SYMBOL_LEN];
struct nlattr *attr;
void *hdr;
@@ -620,10 +636,15 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
goto nla_put_failure;
- if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
+ NET_DM_ATTR_PAD))
goto nla_put_failure;
- snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+ if (nla_put_string(msg, NET_DM_ATTR_REASON,
+ drop_reasons[cb->reason]))
+ goto nla_put_failure;
+
+ snprintf(buf, sizeof(buf), "%pS", cb->pc);
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
goto nla_put_failure;
@@ -679,7 +700,9 @@ static void net_dm_packet_report(struct sk_buff *skb)
if (net_dm_trunc_len)
payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
- msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len,
+ NET_DM_SKB_CB(skb)->reason),
+ GFP_KERNEL);
if (!msg)
goto out;
@@ -1165,7 +1188,6 @@ err_module_put:
static void net_dm_trace_off_set(void)
{
- struct dm_hw_stat_delta *new_stat, *temp;
const struct net_dm_alert_ops *ops;
int cpu;
@@ -1189,13 +1211,6 @@ static void net_dm_trace_off_set(void)
consume_skb(skb);
}
- list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
- if (new_stat->dev == NULL) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- }
- }
-
module_put(THIS_MODULE);
}
@@ -1556,38 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct dm_hw_stat_delta *new_stat = NULL;
- struct dm_hw_stat_delta *tmp;
+ struct dm_hw_stat_delta *stat;
switch (event) {
case NETDEV_REGISTER:
- new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+ if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
+ break;
+ stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat)
+ break;
- if (!new_stat)
- goto out;
+ stat->last_rx = jiffies;
+ rcu_assign_pointer(dev->dm_private, stat);
- new_stat->dev = dev;
- new_stat->last_rx = jiffies;
- mutex_lock(&net_dm_mutex);
- list_add_rcu(&new_stat->list, &hw_stats_list);
- mutex_unlock(&net_dm_mutex);
break;
case NETDEV_UNREGISTER:
- mutex_lock(&net_dm_mutex);
- list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
- if (new_stat->dev == dev) {
- new_stat->dev = NULL;
- if (trace_state == TRACE_OFF) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- break;
- }
- }
+ stat = rtnl_dereference(dev->dm_private);
+ if (stat) {
+ rcu_assign_pointer(dev->dm_private, NULL);
+ kfree_rcu(stat, rcu);
}
- mutex_unlock(&net_dm_mutex);
break;
}
-out:
return NOTIFY_DONE;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 4603b7cd3cd1..2a6a0b0ce43e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -78,6 +78,7 @@
#include <linux/btf_ids.h>
#include <net/tls.h>
#include <net/xdp.h>
+#include <net/mptcp.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -1687,7 +1688,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1722,7 +1723,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -2107,7 +2108,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -2176,7 +2177,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2274,7 +2275,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
}
skb->dev = dev;
- skb->tstamp = 0;
+ skb_clear_tstamp(skb);
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
@@ -2603,7 +2604,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
+ if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@ -2710,6 +2711,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
if (unlikely(flags))
return -EINVAL;
+ if (unlikely(len == 0))
+ return 0;
+
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
@@ -2809,7 +2813,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- __clear_bit(new, &msg->sg.copy);
+ __clear_bit(new, msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@ -3783,6 +3787,28 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+{
+ return xdp_get_buff_len(xdp);
+}
+
+static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+ .func = bpf_xdp_get_buff_len,
+ .gpl_only = false,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3817,11 +3843,208 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
+{
+ unsigned long ptr_len, ptr_off = 0;
+ skb_frag_t *next_frag, *end_frag;
+ struct skb_shared_info *sinfo;
+ void *src, *dst;
+ u8 *ptr_buf;
+
+ if (likely(xdp->data_end - xdp->data >= off + len)) {
+ src = flush ? buf : xdp->data + off;
+ dst = flush ? xdp->data + off : buf;
+ memcpy(dst, src, len);
+ return;
+ }
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ end_frag = &sinfo->frags[sinfo->nr_frags];
+ next_frag = &sinfo->frags[0];
+
+ ptr_len = xdp->data_end - xdp->data;
+ ptr_buf = xdp->data;
+
+ while (true) {
+ if (off < ptr_off + ptr_len) {
+ unsigned long copy_off = off - ptr_off;
+ unsigned long copy_len = min(len, ptr_len - copy_off);
+
+ src = flush ? buf : ptr_buf + copy_off;
+ dst = flush ? ptr_buf + copy_off : buf;
+ memcpy(dst, src, copy_len);
+
+ off += copy_len;
+ len -= copy_len;
+ buf += copy_len;
+ }
+
+ if (!len || next_frag == end_frag)
+ break;
+
+ ptr_off += ptr_len;
+ ptr_buf = skb_frag_address(next_frag);
+ ptr_len = skb_frag_size(next_frag);
+ next_frag++;
+ }
+}
+
+static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ u32 size = xdp->data_end - xdp->data;
+ void *addr = xdp->data;
+ int i;
+
+ if (unlikely(offset > 0xffff || len > 0xffff))
+ return ERR_PTR(-EFAULT);
+
+ if (offset + len > xdp_get_buff_len(xdp))
+ return ERR_PTR(-EINVAL);
+
+ if (offset < size) /* linear area */
+ goto out;
+
+ offset -= size;
+ for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
+ u32 frag_size = skb_frag_size(&sinfo->frags[i]);
+
+ if (offset < frag_size) {
+ addr = skb_frag_address(&sinfo->frags[i]);
+ size = frag_size;
+ break;
+ }
+ offset -= frag_size;
+ }
+out:
+ return offset + len < size ? addr + offset : NULL;
+}
+
+BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, false);
+ else
+ memcpy(buf, ptr, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
+ .func = bpf_xdp_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
+ void *, buf, u32, len)
+{
+ void *ptr;
+
+ ptr = bpf_xdp_pointer(xdp, offset, len);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ if (!ptr)
+ bpf_xdp_copy_buf(xdp, offset, buf, len, true);
+ else
+ memcpy(ptr, buf, len);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
+ .func = bpf_xdp_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
+ struct xdp_rxq_info *rxq = xdp->rxq;
+ unsigned int tailroom;
+
+ if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
+ return -EOPNOTSUPP;
+
+ tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
+ if (unlikely(offset > tailroom))
+ return -EINVAL;
+
+ memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
+ skb_frag_size_add(frag, offset);
+ sinfo->xdp_frags_size += offset;
+
+ return 0;
+}
+
+static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, n_frags_free = 0, len_free = 0;
+
+ if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
+ return -EINVAL;
+
+ for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ int shrink = min_t(int, offset, skb_frag_size(frag));
+
+ len_free += shrink;
+ offset -= shrink;
+
+ if (skb_frag_size(frag) == shrink) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), &xdp->rxq->mem,
+ false, NULL);
+ n_frags_free++;
+ } else {
+ skb_frag_size_sub(frag, shrink);
+ break;
+ }
+ }
+ sinfo->nr_frags -= n_frags_free;
+ sinfo->xdp_frags_size -= len_free;
+
+ if (unlikely(!sinfo->nr_frags)) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp->data_end -= offset;
+ }
+
+ return 0;
+}
+
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;
+ if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
+ if (offset < 0)
+ return bpf_xdp_frags_shrink_tail(xdp, -offset);
+
+ return bpf_xdp_frags_increase_tail(xdp, offset);
+ }
+
/* Notice that xdp_data_hard_end have reserved some tailroom */
if (unlikely(data_end > data_hard_end))
return -EINVAL;
@@ -4047,6 +4270,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
+ /* XDP_REDIRECT is not fully supported yet for xdp frags since
+ * not all XDP capable drivers can map non-linear xdp_frame in
+ * ndo_xdp_xmit.
+ */
+ if (unlikely(xdp_buff_has_frags(xdp) &&
+ map_type != BPF_MAP_TYPE_CPUMAP))
+ return -EOPNOTSUPP;
+
if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
@@ -4268,6 +4499,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
err = -EINVAL;
switch (size) {
+ case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
goto set_compat;
@@ -4293,10 +4525,14 @@ set_compat:
if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
sizeof(to->remote_ipv6));
+ memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
+ sizeof(to->local_ipv6));
to->tunnel_label = be32_to_cpu(info->key.label);
} else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
+ to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
+ memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3);
to->tunnel_label = 0;
}
@@ -4367,6 +4603,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
+ case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
@@ -4409,10 +4646,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->mode |= IP_TUNNEL_INFO_IPV6;
memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
sizeof(from->remote_ipv6));
+ memcpy(&info->key.u.ipv6.src, from->local_ipv6,
+ sizeof(from->local_ipv6));
info->key.label = cpu_to_be32(from->tunnel_label) &
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
}
return 0;
@@ -4590,10 +4830,12 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
};
#endif
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
unsigned long off, unsigned long len)
{
- memcpy(dst_buff, src_buff + off, len);
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+
+ bpf_xdp_copy_buf(xdp, off, dst, len, false);
return 0;
}
@@ -4604,11 +4846,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(!xdp ||
- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+ if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
return -EFAULT;
- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+ return bpf_event_output(map, flags, meta, meta_size, xdp,
xdp_size, bpf_xdp_copy);
}
@@ -4862,6 +5104,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
case SO_REUSEPORT:
sk->sk_reuseport = valbool;
break;
+ case SO_TXREHASH:
+ if (val < -1 || val > 1) {
+ ret = -EINVAL;
+ break;
+ }
+ sk->sk_txrehash = (u8)val;
+ break;
default:
ret = -EINVAL;
}
@@ -4934,7 +5183,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (val <= 0 || tp->data_segs_out > tp->syn_data)
ret = -EINVAL;
else
- tp->snd_cwnd = val;
+ tcp_snd_cwnd_set(tp, val);
break;
case TCP_BPF_SNDCWND_CLAMP:
if (val <= 0) {
@@ -5040,6 +5289,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
case SO_REUSEPORT:
*((int *)optval) = sk->sk_reuseport;
break;
+ case SO_TXREHASH:
+ *((int *)optval) = sk->sk_txrehash;
+ break;
default:
goto err_clear;
}
@@ -5906,7 +6158,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
if (err)
return err;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
@@ -6264,10 +6515,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
ifindex, proto, netns_id, flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6301,10 +6563,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
@@ -6379,7 +6652,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -6774,24 +7047,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (!th->ack || th->rst || th->syn)
return -ENOENT;
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
cookie = ntohl(th->ack_seq) - 1;
- switch (sk->sk_family) {
- case AF_INET:
- if (unlikely(iph_len < sizeof(struct iphdr)))
+ /* Both struct iphdr and struct ipv6hdr have the version field at the
+ * same offset so we can cast to the shorter header (struct iphdr).
+ */
+ switch (((struct iphdr *)iph)->version) {
+ case 4:
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
break;
#if IS_BUILTIN(CONFIG_IPV6)
- case AF_INET6:
+ case 6:
if (unlikely(iph_len < sizeof(struct ipv6hdr)))
return -EINVAL;
+ if (sk->sk_family != AF_INET6)
+ return -EINVAL;
+
ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
break;
#endif /* CONFIG_IPV6 */
@@ -6848,7 +7130,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
*/
switch (((struct iphdr *)iph)->version) {
case 4:
- if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
@@ -7146,6 +7428,43 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
+ u64, tstamp, u32, tstamp_type)
+{
+ /* skb_clear_delivery_time() is done for inet protocol */
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))
+ return -EOPNOTSUPP;
+
+ switch (tstamp_type) {
+ case BPF_SKB_TSTAMP_DELIVERY_MONO:
+ if (!tstamp)
+ return -EINVAL;
+ skb->tstamp = tstamp;
+ skb->mono_delivery_time = 1;
+ break;
+ case BPF_SKB_TSTAMP_UNSPEC:
+ if (tstamp)
+ return -EINVAL;
+ skb->tstamp = 0;
+ skb->mono_delivery_time = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
+ .func = bpf_skb_set_tstamp,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -7507,6 +7826,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
case BPF_FUNC_sk_assign:
return &bpf_sk_assign_proto;
+ case BPF_FUNC_skb_set_tstamp:
+ return &bpf_skb_set_tstamp_proto;
#endif
default:
return bpf_sk_base_func_proto(func_id);
@@ -7533,6 +7854,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
+ case BPF_FUNC_xdp_get_buff_len:
+ return &bpf_xdp_get_buff_len_proto;
+ case BPF_FUNC_xdp_load_bytes:
+ return &bpf_xdp_load_bytes_proto;
+ case BPF_FUNC_xdp_store_bytes:
+ return &bpf_xdp_store_bytes_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
case BPF_FUNC_check_mtu:
@@ -7840,7 +8167,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
- case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+ case offsetof(struct __sk_buff, tstamp_type):
+ return false;
+ case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
/* Explicitly prohibit access to padding in __sk_buff. */
return false;
default:
@@ -8030,6 +8359,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
+ int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
@@ -8041,7 +8371,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
- case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -8050,6 +8379,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range(struct bpf_sock, dst_port):
+ field_size = size == size_default ?
+ size_default : sizeof_field(struct bpf_sock, dst_port);
+ bpf_ctx_record_field_size(info, field_size);
+ return bpf_ctx_narrow_access_ok(off, size, field_size);
+ case offsetofend(struct bpf_sock, dst_port) ...
+ offsetof(struct bpf_sock, dst_ip4) - 1:
+ return false;
}
return size == size_default;
@@ -8187,6 +8524,15 @@ static bool tc_cls_act_is_valid_access(int off, int size,
break;
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
+ case offsetof(struct __sk_buff, tstamp_type):
+ /* The convert_ctx_access() on reading and writing
+ * __sk_buff->tstamp depends on whether the bpf prog
+ * has used __sk_buff->tstamp_type or not.
+ * Thus, we need to set prog->tstamp_type_access
+ * earlier during is_valid_access() here.
+ */
+ ((struct bpf_prog *)prog)->tstamp_type_access = 1;
+ return size == sizeof(__u8);
}
return bpf_skb_is_valid_access(off, size, type, prog, info);
@@ -8582,6 +8928,25 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->dst_reg;
+ __u8 skb_reg = si->src_reg;
+ /* AX is needed because src_reg and dst_reg could be the same */
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
+ PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_MASK, 2);
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
+ *insn++ = BPF_JMP_A(1);
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
+
+ return insn;
+}
+
static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
@@ -8603,6 +8968,74 @@ static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
return insn;
}
+static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->dst_reg;
+ __u8 skb_reg = si->src_reg;
+
+#ifdef CONFIG_NET_CLS_ACT
+ /* If the tstamp_type is read,
+ * the bpf prog is aware the tstamp could have delivery time.
+ * Thus, read skb->tstamp as is if tstamp_type_access is true.
+ */
+ if (!prog->tstamp_type_access) {
+ /* AX is needed because src_reg and dst_reg could be the same */
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
+ /* skb->tc_at_ingress && skb->mono_delivery_time,
+ * read 0 as the (rcv) timestamp.
+ */
+ *insn++ = BPF_MOV64_IMM(value_reg, 0);
+ *insn++ = BPF_JMP_A(1);
+ }
+#endif
+
+ *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
+ offsetof(struct sk_buff, tstamp));
+ return insn;
+}
+
+static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn)
+{
+ __u8 value_reg = si->src_reg;
+ __u8 skb_reg = si->dst_reg;
+
+#ifdef CONFIG_NET_CLS_ACT
+ /* If the tstamp_type is read,
+ * the bpf prog is aware the tstamp could have delivery time.
+ * Thus, write skb->tstamp as is if tstamp_type_access is true.
+ * Otherwise, writing at ingress will have to clear the
+ * mono_delivery_time bit also.
+ */
+ if (!prog->tstamp_type_access) {
+ __u8 tmp_reg = BPF_REG_AX;
+
+ *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
+ /* Writing __sk_buff->tstamp as ingress, goto <clear> */
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
+ /* goto <store> */
+ *insn++ = BPF_JMP_A(2);
+ /* <clear>: mono_delivery_time */
+ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
+ *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET);
+ }
+#endif
+
+ /* <store>: skb->tstamp = tstamp */
+ *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
+ offsetof(struct sk_buff, tstamp));
+ return insn;
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -8911,17 +9344,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_DW,
- si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- tstamp, 8,
- target_size));
+ insn = bpf_convert_tstamp_write(prog, si, insn);
else
- *insn++ = BPF_LDX_MEM(BPF_DW,
- si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- tstamp, 8,
- target_size));
+ insn = bpf_convert_tstamp_read(prog, si, insn);
+ break;
+
+ case offsetof(struct __sk_buff, tstamp_type):
+ insn = bpf_convert_tstamp_type_read(si, insn);
break;
case offsetof(struct __sk_buff, gso_segs):
@@ -10062,7 +10491,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
- .check_kfunc_call = bpf_prog_test_check_kfunc_call,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
@@ -10601,12 +11029,24 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
- case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
bpf_ctx_record_field_size(info, sizeof(__u32));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ /* Allow 4-byte access to 2-byte field for backward compatibility */
+ if (size == sizeof(__u32))
+ return true;
+ bpf_ctx_record_field_size(info, sizeof(__be16));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16));
+
+ case offsetofend(struct bpf_sk_lookup, remote_port) ...
+ offsetof(struct bpf_sk_lookup, local_ip4) - 1:
+ /* Allow access to zero padding for backward compatibility */
+ bpf_ctx_record_field_size(info, sizeof(__u16));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16));
+
default:
return false;
}
@@ -10688,6 +11128,11 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
sport, 2, target_size));
break;
+ case offsetofend(struct bpf_sk_lookup, remote_port):
+ *target_size = 2;
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+ break;
+
case offsetof(struct bpf_sk_lookup, local_port):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sk_lookup_kern,
@@ -10858,6 +11303,20 @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
};
+BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
+{
+ BTF_TYPE_EMIT(struct mptcp_sock);
+ return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
+}
+
+const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
+ .func = bpf_skc_to_mptcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
+};
+
BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
return (unsigned long)sock_from_file(file);
@@ -10900,6 +11359,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_unix_sock:
func = &bpf_skc_to_unix_sock_proto;
break;
+ case BPF_FUNC_skc_to_mptcp_sock:
+ func = &bpf_skc_to_mptcp_sock_proto;
+ break;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 15833e1d6ea1..6aee04f75e3e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -22,6 +22,7 @@
#include <linux/ppp_defs.h>
#include <linux/stddef.h>
#include <linux/if_ether.h>
+#include <linux/if_hsr.h>
#include <linux/mpls.h>
#include <linux/tcp.h>
#include <linux/ptp_classify.h>
@@ -1031,7 +1032,17 @@ bool __skb_flow_dissect(const struct net *net,
key_eth_addrs = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS,
target_container);
- memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+ memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs));
+ }
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) {
+ struct flow_dissector_key_num_of_vlans *key_num_of_vlans;
+
+ key_num_of_vlans = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS,
+ target_container);
+ key_num_of_vlans->num_of_vlans = 0;
}
proto_again:
@@ -1157,6 +1168,16 @@ proto_again:
nhoff += sizeof(*vlan);
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) {
+ struct flow_dissector_key_num_of_vlans *key_nvs;
+
+ key_nvs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS,
+ target_container);
+ key_nvs->num_of_vlans++;
+ }
+
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
@@ -1182,6 +1203,7 @@ proto_again:
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
key_vlan->vlan_tpid = saved_vlan_tpid;
+ key_vlan->vlan_eth_type = proto;
}
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
@@ -1282,6 +1304,23 @@ proto_again:
break;
}
+ case htons(ETH_P_PRP):
+ case htons(ETH_P_HSR): {
+ struct hsr_tag *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen,
+ &_hdr);
+ if (!hdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ proto = hdr->encap_proto;
+ nhoff += HSR_HLEN;
+ fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+ break;
+ }
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 73f68d4625f3..929f6379a279 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -595,3 +595,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count;
}
EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
+bool flow_indr_dev_exists(void)
+{
+ return !list_empty(&flow_block_indr_dev_list);
+}
+EXPORT_SYMBOL(flow_indr_dev_exists);
diff --git a/net/core/gro.c b/net/core/gro.c
index a11b286d1495..b4190eb08467 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -93,6 +93,31 @@ void dev_remove_offload(struct packet_offload *po)
EXPORT_SYMBOL(dev_remove_offload);
/**
+ * skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ * @type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
+ segs = ptype->callbacks.gso_segment(skb, features);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
* skb_mac_gso_segment - mac layer segmentation handler.
* @skb: buffer to segment
* @features: features for the output path (see dev->features)
@@ -142,6 +167,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
+ if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
+ if (p->protocol != htons(ETH_P_IPV6) ||
+ skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
+ ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+ p->encapsulation)
+ return -E2BIG;
+ }
+
lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp);
@@ -459,29 +492,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
- NAPI_GRO_CB(skb)->same_flow = 0;
+ BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
+ sizeof(u32))); /* Avoid slow unaligned acc */
+ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->free = 0;
- NAPI_GRO_CB(skb)->encap_mark = 0;
- NAPI_GRO_CB(skb)->recursion_counter = 0;
- NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->is_atomic = 1;
- NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
- NAPI_GRO_CB(skb)->csum_cnt = 0;
break;
case CHECKSUM_UNNECESSARY:
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
- NAPI_GRO_CB(skb)->csum_valid = 0;
break;
- default:
- NAPI_GRO_CB(skb)->csum_cnt = 0;
- NAPI_GRO_CB(skb)->csum_valid = 0;
}
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
@@ -634,7 +660,6 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
if (unlikely(skb->slow_gro)) {
skb_orphan(skb);
skb_ext_reset(skb);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 6eb2e5ec2c50..541c7a72a28a 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -28,7 +28,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
drop:
- atomic_long_inc(&dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
res = NET_RX_DROP;
goto unlock;
@@ -89,8 +89,23 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev)
}
EXPORT_SYMBOL(gro_cells_init);
+struct percpu_free_defer {
+ struct rcu_head rcu;
+ void __percpu *ptr;
+};
+
+static void percpu_free_defer_callback(struct rcu_head *head)
+{
+ struct percpu_free_defer *defer;
+
+ defer = container_of(head, struct percpu_free_defer, rcu);
+ free_percpu(defer->ptr);
+ kfree(defer);
+}
+
void gro_cells_destroy(struct gro_cells *gcells)
{
+ struct percpu_free_defer *defer;
int i;
if (!gcells->cells)
@@ -102,12 +117,23 @@ void gro_cells_destroy(struct gro_cells *gcells)
__netif_napi_del(&cell->napi);
__skb_queue_purge(&cell->napi_skbs);
}
- /* This barrier is needed because netpoll could access dev->napi_list
- * under rcu protection.
+ /* We need to observe an rcu grace period before freeing ->cells,
+ * because netpoll could access dev->napi_list under rcu protection.
+ * Try hard using call_rcu() instead of synchronize_rcu(),
+ * because we might be called from cleanup_net(), and we
+ * definitely do not want to block this critical task.
*/
- synchronize_net();
-
- free_percpu(gcells->cells);
+ defer = kmalloc(sizeof(*defer), GFP_KERNEL | __GFP_NOWARN);
+ if (likely(defer)) {
+ defer->ptr = gcells->cells;
+ call_rcu(&defer->rcu, percpu_free_defer_callback);
+ } else {
+ /* We do not hold RTNL at this point, synchronize_net()
+ * would not be able to expedite this sync.
+ */
+ synchronize_rcu_expedited();
+ free_percpu(gcells->cells);
+ }
gcells->cells = NULL;
}
EXPORT_SYMBOL(gro_cells_destroy);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index b0f5344d1185..a244d3bade7d 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -18,6 +18,7 @@
#include <linux/bitops.h>
#include <linux/types.h>
+#include "dev.h"
enum lw_bits {
LW_URGENT = 0,
@@ -166,10 +167,10 @@ static void linkwatch_do_dev(struct net_device *dev)
netdev_state_change(dev);
}
- /* Note: our callers are responsible for
- * calling netdev_tracker_free().
+ /* Note: our callers are responsible for calling netdev_tracker_free().
+ * This is the reason we use __dev_put() instead of dev_put().
*/
- dev_put(dev);
+ __dev_put(dev);
}
static void __linkwatch_run_queue(int urgent_only)
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 349480ef68a5..8b6b5e72b217 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 213cb7b26b7a..54625287ee5b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1133,7 +1133,8 @@ out:
neigh_release(neigh);
}
-int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
+ const bool immediate_ok)
{
int rc;
bool immediate_probe = false;
@@ -1154,18 +1155,23 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
- next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
- HZ/100);
+ if (!immediate_ok) {
+ next = now + 1;
+ } else {
+ immediate_probe = true;
+ next = now + max(NEIGH_VAR(neigh->parms,
+ RETRANS_TIME),
+ HZ / 100);
+ }
neigh_add_timer(neigh, next);
- immediate_probe = true;
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
@@ -1187,7 +1193,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (!buff)
break;
neigh->arp_queue_len_bytes -= buff->truesize;
- kfree_skb(buff);
+ kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
skb_dst_force(skb);
@@ -1209,7 +1215,7 @@ out_dead:
if (neigh->nud_state & NUD_STALE)
goto out_unlock_bh;
write_unlock_bh(&neigh->lock);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
trace_neigh_event_send_dead(neigh, 1);
return 1;
}
@@ -1571,9 +1577,9 @@ static void neigh_managed_work(struct work_struct *work)
write_lock_bh(&tbl->lock);
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
- neigh_event_send(neigh, NULL);
+ neigh_event_send_probe(neigh, NULL, false);
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
- NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+ max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ));
write_unlock_bh(&tbl->lock);
}
@@ -3364,7 +3370,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
if (*pos == 0)
@@ -3381,7 +3387,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
int cpu;
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -3401,7 +3407,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
- struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
+ struct neigh_table *tbl = pde_data(file_inode(seq->file));
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
@@ -3722,7 +3728,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
char *p_name;
- t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto err;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..1ec23bf8b05c 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -4,6 +4,8 @@
#include <linux/seq_file.h>
#include <net/wext.h>
+#include "dev.h"
+
#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
@@ -190,12 +192,23 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -216,22 +229,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -260,7 +291,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 53ea262ecafd..a3642569fe53 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include "dev.h"
#include "net-sysfs.h"
#ifdef CONFIG_SYSFS
@@ -32,6 +33,7 @@ static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
+/* Caller holds RTNL or dev_base_lock */
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state <= NETREG_REGISTERED;
@@ -213,7 +215,7 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev)) {
+ if (netif_running(netdev) && netif_device_present(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
@@ -745,7 +747,6 @@ static const struct attribute_group netstat_group = {
.attrs = netstat_attrs,
};
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
static struct attribute *wireless_attrs[] = {
NULL
};
@@ -754,7 +755,19 @@ static const struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
+
+static bool wireless_group_needed(struct net_device *ndev)
+{
+#if IS_ENABLED(CONFIG_CFG80211)
+ if (ndev->ieee80211_ptr)
+ return true;
+#endif
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
+ if (ndev->wireless_handlers)
+ return true;
#endif
+ return false;
+}
#else /* CONFIG_SYSFS */
#define net_class_groups NULL
@@ -823,7 +836,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
{
struct rps_map *old_map, *map;
cpumask_var_t mask;
- int err, cpu, i, hk_flags;
+ int err, cpu, i;
static DEFINE_MUTEX(rps_map_mutex);
if (!capable(CAP_NET_ADMIN))
@@ -839,8 +852,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
if (!cpumask_empty(mask)) {
- hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
- cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
if (cpumask_empty(mask)) {
free_cpumask_var(mask);
return -EINVAL;
@@ -1995,14 +2008,8 @@ int netdev_register_kobject(struct net_device *ndev)
*groups++ = &netstat_group;
</