aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/Kconfig17
-rw-r--r--net/Makefile3
-rw-r--r--net/atm/atm_sysfs.c22
-rw-r--r--net/atm/lec.c78
-rw-r--r--net/atm/mpoa_proc.c17
-rw-r--r--net/atm/proc.c11
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/Kconfig3
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c2
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c6
-rw-r--r--net/batman-adv/bat_iv_ogm.h2
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v.h2
-rw-r--r--net/batman-adv/bat_v_elp.c15
-rw-r--r--net/batman-adv/bat_v_elp.h2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bat_v_ogm.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c4
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c2
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c10
-rw-r--r--net/batman-adv/distributed-arp-table.h2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/hard-interface.h2
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h2
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/log.h12
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h4
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/multicast.h2
-rw-r--r--net/batman-adv/netlink.c2
-rw-r--r--net/batman-adv/netlink.h2
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/routing.c2
-rw-r--r--net/batman-adv/routing.h2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/send.h2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/soft-interface.h2
-rw-r--r--net/batman-adv/sysfs.c2
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/tp_meter.h2
-rw-r--r--net/batman-adv/trace.c2
-rw-r--r--net/batman-adv/trace.h2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bluetooth/hci_core.c100
-rw-r--r--net/bluetooth/hci_debugfs.c78
-rw-r--r--net/bluetooth/hci_event.c41
-rw-r--r--net/bluetooth/hci_sock.c21
-rw-r--r--net/bluetooth/l2cap_core.c55
-rw-r--r--net/bluetooth/lib.c16
-rw-r--r--net/bluetooth/mgmt.c88
-rw-r--r--net/bluetooth/smp.c111
-rw-r--r--net/bpf/test_run.c54
-rw-r--r--net/bpfilter/Makefile2
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br_device.c9
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_input.c7
-rw-r--r--net/bridge/br_netlink.c74
-rw-r--r--net/bridge/br_private.h151
-rw-r--r--net/bridge/br_stp.c18
-rw-r--r--net/bridge/br_stp_bpdu.c4
-rw-r--r--net/bridge/br_vlan.c555
-rw-r--r--net/bridge/br_vlan_options.c160
-rw-r--r--net/caif/caif_dev.c3
-rw-r--r--net/caif/caif_usb.c2
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/ceph_common.c41
-rw-r--r--net/ceph/ceph_fs.c104
-rw-r--r--net/ceph/osd_client.c18
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/datagram.c27
-rw-r--r--net/core/dev.c286
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/devlink.c255
-rw-r--r--net/core/drop_monitor.c12
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c223
-rw-r--r--net/core/flow_dissector.c13
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/net_namespace.c99
-rw-r--r--net/core/netclassid_cgroup.c47
-rw-r--r--net/core/page_pool.c93
-rw-r--r--net/core/pktgen.c44
-rw-r--r--net/core/rtnetlink.c48
-rw-r--r--net/core/skbuff.c149
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sock_map.c28
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/timestamping.c20
-rw-r--r--net/core/utils.c20
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dsa/Kconfig6
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa2.c67
-rw-r--r--net/dsa/dsa_priv.h21
-rw-r--r--net/dsa/master.c30
-rw-r--r--net/dsa/port.c83
-rw-r--r--net/dsa/slave.c49
-rw-r--r--net/dsa/tag_ar9331.c96
-rw-r--r--net/dsa/tag_qca.c2
-rw-r--r--net/dsa/tag_sja1105.c18
-rw-r--r--net/ethernet/eth.c16
-rw-r--r--net/ethtool/Makefile8
-rw-r--r--net/ethtool/bitset.c739
-rw-r--r--net/ethtool/bitset.h30
-rw-r--r--net/ethtool/common.c259
-rw-r--r--net/ethtool/common.h31
-rw-r--r--net/ethtool/debug.c134
-rw-r--r--net/ethtool/ioctl.c (renamed from net/core/ethtool.c)171
-rw-r--r--net/ethtool/linkinfo.c167
-rw-r--r--net/ethtool/linkmodes.c375
-rw-r--r--net/ethtool/linkstate.c74
-rw-r--r--net/ethtool/netlink.c729
-rw-r--r--net/ethtool/netlink.h345
-rw-r--r--net/ethtool/strset.c437
-rw-r--r--net/ethtool/wol.c177
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/hsr/hsr_main.h2
-rw-r--r--net/hsr/hsr_slave.c2
-rw-r--r--net/ieee802154/nl_policy.c6
-rw-r--r--net/ipv4/Kconfig11
-rw-r--r--net/ipv4/Makefile4
-rw-r--r--net/ipv4/bpf_tcp_ca.c252
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/esp4.c264
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_lookup.h8
-rw-r--r--net/ipv4/fib_semantics.c33
-rw-r--r--net/ipv4/fib_trie.c194
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/gre_demux.c12
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/icmp.c33
-rw-r--r--net/ipv4/inet_connection_sock.c40
-rw-r--r--net/ipv4/inet_diag.c44
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/ip_vti.c13
-rw-r--r--net/ipv4/ipconfig.c10
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c16
-rw-r--r--net/ipv4/nexthop.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw_diag.c5
-rw-r--r--net/ipv4/route.c56
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c22
-rw-r--r--net/ipv4/tcp_bbr.c3
-rw-r--r--net/ipv4/tcp_cong.c16
-rw-r--r--net/ipv4/tcp_cubic.c83
-rw-r--r--net/ipv4/tcp_input.c54
-rw-r--r--net/ipv4/tcp_ipv4.c135
-rw-r--r--net/ipv4/tcp_metrics.c13
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c74
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_diag.c5
-rw-r--r--net/ipv4/udp_offload.c106
-rw-r--r--net/ipv4/xfrm4_protocol.c9
-rw-r--r--net/ipv6/addrconf.c54
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/ip6_fib.c122
-rw-r--r--net/ipv6/ip6_gre.c11
-rw-r--r--net/ipv6/ip6_icmp.c34
-rw-r--r--net/ipv6/ip6_tunnel.c85
-rw-r--r--net/ipv6/ip6_vti.c13
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/route.c94
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/seg6_local.c6
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c124
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/udp_offload.c29
-rw-r--r--net/l2tp/l2tp_core.c9
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/mesh_hwmp.c3
-rw-r--r--net/mac80211/mlme.c14
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/trace.h28
-rw-r--r--net/mac80211/tx.c15
-rw-r--r--net/mac80211/util.c34
-rw-r--r--net/mptcp/Kconfig29
-rw-r--r--net/mptcp/Makefile4
-rw-r--r--net/mptcp/crypto.c152
-rw-r--r--net/mptcp/ctrl.c130
-rw-r--r--net/mptcp/options.c601
-rw-r--r--net/mptcp/protocol.c1248
-rw-r--r--net/mptcp/protocol.h240
-rw-r--r--net/mptcp/subflow.c865
-rw-r--r--net/mptcp/token.c195
-rw-r--r--net/ncsi/internal.h20
-rw-r--r--net/ncsi/ncsi-cmd.c10
-rw-r--r--net/ncsi/ncsi-manage.c72
-rw-r--r--net/ncsi/ncsi-rsp.c6
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c75
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h635
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c196
-rw-r--r--net/netfilter/nf_conntrack_extend.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c20
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c34
-rw-r--r--net/netfilter/nf_flow_table_ip.c21
-rw-r--r--net/netfilter/nf_flow_table_offload.c165
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c423
-rw-r--r--net/netfilter/nf_tables_offload.c2
-rw-r--r--net/netfilter/nf_tables_set_core.c2
-rw-r--r--net/netfilter/nfnetlink.c6
-rw-r--r--net/netfilter/nfnetlink_cthelper.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c8
-rw-r--r--net/netfilter/nft_bitwise.c224
-rw-r--r--net/netfilter/nft_chain_nat.c1
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_meta.c440
-rw-r--r--net/netfilter/nft_osf.c3
-rw-r--r--net/netfilter/nft_payload.c1
-rw-r--r--net/netfilter/nft_set_bitmap.c4
-rw-r--r--net/netfilter/nft_set_hash.c2
-rw-r--r--net/netfilter/nft_set_pipapo.c2104
-rw-r--r--net/netfilter/nft_set_rbtree.c3
-rw-r--r--net/netfilter/nft_tunnel.c54
-rw-r--r--net/netfilter/x_tables.c10
-rw-r--r--net/netfilter/xt_hashlimit.c58
-rw-r--r--net/netfilter/xt_recent.c19
-rw-r--r--net/netlabel/netlabel_domainhash.c3
-rw-r--r--net/netlabel/netlabel_unlabeled.c3
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/netlink/genetlink.c5
-rw-r--r--net/nfc/hci/core.c19
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/actions.c30
-rw-r--r--net/openvswitch/datapath.c21
-rw-r--r--net/openvswitch/flow_netlink.c52
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/openvswitch/meter.c3
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/packet/af_packet.c70
-rw-r--r--net/phonet/pn_dev.c2
-rw-r--r--net/qrtr/qrtr.c319
-rw-r--r--net/rds/ib.c7
-rw-r--r--net/rds/ib.h3
-rw-r--r--net/rds/ib_mr.h7
-rw-r--r--net/rds/ib_rdma.c84
-rw-r--r--net/rds/ib_send.c44
-rw-r--r--net/rds/rdma.c169
-rw-r--r--net/rds/rds.h13
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rose/rose_route.c1
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h11
-rw-r--r--net/rxrpc/call_object.c26
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c30
-rw-r--r--net/rxrpc/conn_object.c3
-rw-r--r--net/rxrpc/input.c18
-rw-r--r--net/rxrpc/local_object.c23
-rw-r--r--net/rxrpc/output.c27
-rw-r--r--net/rxrpc/peer_event.c42
-rw-r--r--net/sched/Kconfig30
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c1
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c11
-rw-r--r--net/sched/cls_bpf.c11
-rw-r--r--net/sched/cls_flower.c13
-rw-r--r--net/sched/cls_fw.c11
-rw-r--r--net/sched/cls_matchall.c12
-rw-r--r--net/sched/cls_route.c11
-rw-r--r--net/sched/cls_rsvp.h17
-rw-r--r--net/sched/cls_tcindex.c54
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/ematch.c5
-rw-r--r--net/sched/sch_api.c47
-rw-r--r--net/sched/sch_cake.c63
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_ets.c828
-rw-r--r--net/sched/sch_fq.c1
-rw-r--r--net/sched/sch_fq_pie.c562
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_pie.c289
-rw-r--r--net/sched/sch_taprio.c105
-rw-r--r--net/sched/sch_tbf.c60
-rw-r--r--net/sctp/associola.c10
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/diag.c8
-rw-r--r--net/sctp/endpointola.c6
-rw-r--r--net/sctp/input.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c13
-rw-r--r--net/sctp/sm_make_chunk.c7
-rw-r--r--net/sctp/sm_sideeffect.c16
-rw-r--r--net/sctp/sm_statefuns.c49
-rw-r--r--net/sctp/socket.c12
-rw-r--r--net/sctp/stream.c3
-rw-r--r--net/sctp/stream_interleave.c23
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sctp/ulpqueue.c15
-rw-r--r--net/smc/af_smc.c27
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_core.c15
-rw-r--r--net/smc/smc_core.h2
-rw-r--r--net/smc/smc_diag.c5
-rw-r--r--net/smc/smc_ib.c3
-rw-r--r--net/smc/smc_pnet.c2
-rw-r--r--net/socket.c21
-rw-r--r--net/sunrpc/addr.c2
-rw-r--r--net/sunrpc/auth.c49
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c8
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c16
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c31
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c20
-rw-r--r--net/sunrpc/cache.c109
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/stats.c21
-rw-r--r--net/sunrpc/svcauth_unix.c10
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c117
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c20
-rw-r--r--net/sunrpc/xprtrdma/transport.c17
-rw-r--r--net/sunrpc/xprtrdma/verbs.c213
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h14
-rw-r--r--net/tipc/bcast.c11
-rw-r--r--net/tipc/bearer.c11
-rw-r--r--net/tipc/bearer.h6
-rw-r--r--net/tipc/crypto.c5
-rw-r--r--net/tipc/eth_media.c3
-rw-r--r--net/tipc/ib_media.c5
-rw-r--r--net/tipc/link.c199
-rw-r--r--net/tipc/link.h9
-rw-r--r--net/tipc/net.c56
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink.c7
-rw-r--r--net/tipc/node.c23
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tipc/udp_media.c3
-rw-r--r--net/tls/tls_device.c25
-rw-r--r--net/unix/af_unix.c74
-rw-r--r--net/vmw_vsock/Kconfig12
-rw-r--r--net/vmw_vsock/Makefile1
-rw-r--r--net/vmw_vsock/af_vsock.c65
-rw-r--r--net/vmw_vsock/hyperv_transport.c3
-rw-r--r--net/vmw_vsock/virtio_transport.c61
-rw-r--r--net/vmw_vsock/virtio_transport_common.c5
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/vmw_vsock/vsock_loopback.c180
-rw-r--r--net/wireless/ethtool.c8
-rw-r--r--net/wireless/nl80211.c14
-rw-r--r--net/wireless/reg.c2
-rw-r--r--net/wireless/trace.h6
-rw-r--r--net/xdp/xdp_umem.c11
-rw-r--r--net/xdp/xsk.c83
-rw-r--r--net/xdp/xsk_queue.c15
-rw-r--r--net/xdp/xsk_queue.h372
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/espintcp.c509
-rw-r--r--net/xfrm/xfrm_device.c15
-rw-r--r--net/xfrm/xfrm_input.c21
-rw-r--r--net/xfrm/xfrm_interface.c40
-rw-r--r--net/xfrm/xfrm_output.c9
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c3
404 files changed, 19851 insertions, 4093 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2a78da4072de..990b9fde28c6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -647,8 +647,8 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
struct phy_device *phydev = vlan->real_dev->phydev;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- return phydev->drv->ts_info(phydev, info);
+ if (phy_has_tsinfo(phydev)) {
+ return phy_ts_info(phydev, info);
} else if (ops->get_ts_info) {
return ops->get_ts_info(vlan->real_dev, info);
} else {
diff --git a/net/Kconfig b/net/Kconfig
index bd191f978a23..2eeb0e55f7c9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -91,6 +91,7 @@ if INET
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
source "net/netlabel/Kconfig"
+source "net/mptcp/Kconfig"
endif # if INET
@@ -108,9 +109,10 @@ config NETWORK_PHY_TIMESTAMPING
bool "Timestamping in PHY devices"
select NET_PTP_CLASSIFY
help
- This allows timestamping of network packets by PHYs with
- hardware timestamping capabilities. This option adds some
- overhead in the transmit and receive paths.
+ This allows timestamping of network packets by PHYs (or
+ other MII bus snooping devices) with hardware timestamping
+ capabilities. This option adds some overhead in the transmit
+ and receive paths.
If you are unsure how to answer this question, answer N.
@@ -187,7 +189,6 @@ config BRIDGE_NETFILTER
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- default m
---help---
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
@@ -448,6 +449,14 @@ config FAILOVER
migration of VMs with direct attached VFs by failing over to the
paravirtual datapath when the VF is unplugged.
+config ETHTOOL_NETLINK
+ bool "Netlink interface for ethtool"
+ default y
+ help
+ An alternative userspace interface for ethtool based on generic
+ netlink. It provides better extensibility and some new features,
+ e.g. notification messages.
+
endif # if NET
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
diff --git a/net/Makefile b/net/Makefile
index 449fc0b221f8..07ea48160874 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
@@ -87,3 +87,4 @@ endif
obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
+obj-$(CONFIG_MPTCP) += mptcp/
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 39b94ca5f65d..aa1b57161f3b 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -33,23 +33,17 @@ static ssize_t show_atmaddress(struct device *cdev,
unsigned long flags;
struct atm_dev *adev = to_atm_dev(cdev);
struct atm_dev_addr *aaddr;
- int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin;
- int i, j, count = 0;
+ int count = 0;
spin_lock_irqsave(&adev->lock, flags);
list_for_each_entry(aaddr, &adev->local, entry) {
- for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
- if (j == *fmt) {
- count += scnprintf(buf + count,
- PAGE_SIZE - count, ".");
- ++fmt;
- j = 0;
- }
- count += scnprintf(buf + count,
- PAGE_SIZE - count, "%02x",
- aaddr->addr.sas_addr.prv[i]);
- }
- count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
+ count += scnprintf(buf + count, PAGE_SIZE - count,
+ "%1phN.%2phN.%10phN.%6phN.%1phN\n",
+ &aaddr->addr.sas_addr.prv[0],
+ &aaddr->addr.sas_addr.prv[1],
+ &aaddr->addr.sas_addr.prv[3],
+ &aaddr->addr.sas_addr.prv[13],
+ &aaddr->addr.sas_addr.prv[19]);
}
spin_unlock_irqrestore(&adev->lock, flags);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5a77c235a212..25fa3a7b72bd 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
dev->stats.tx_bytes += skb->len;
}
-static void lec_tx_timeout(struct net_device *dev)
+static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
pr_info("%s\n", dev->name);
netif_trans_update(dev);
@@ -799,14 +799,9 @@ static const char *lec_arp_get_status_string(unsigned char status)
static void lec_info(struct seq_file *seq, struct lec_arp_table *entry)
{
- int i;
-
- for (i = 0; i < ETH_ALEN; i++)
- seq_printf(seq, "%2.2x", entry->mac_addr[i] & 0xff);
- seq_printf(seq, " ");
- for (i = 0; i < ATM_ESA_LEN; i++)
- seq_printf(seq, "%2.2x", entry->atm_addr[i] & 0xff);
- seq_printf(seq, " %s %4.4x", lec_arp_get_status_string(entry->status),
+ seq_printf(seq, "%pM ", entry->mac_addr);
+ seq_printf(seq, "%*phN ", ATM_ESA_LEN, entry->atm_addr);
+ seq_printf(seq, "%s %4.4x", lec_arp_get_status_string(entry->status),
entry->flags & 0xffff);
if (entry->vcc)
seq_printf(seq, "%3d %3d ", entry->vcc->vpi, entry->vcc->vci);
@@ -1354,7 +1349,7 @@ static void dump_arp_table(struct lec_priv *priv)
{
struct lec_arp_table *rulla;
char buf[256];
- int i, j, offset;
+ int i, offset;
pr_info("Dump %p:\n", priv);
for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
@@ -1362,14 +1357,10 @@ static void dump_arp_table(struct lec_priv *priv)
&priv->lec_arp_tables[i], next) {
offset = 0;
offset += sprintf(buf, "%d: %p\n", i, rulla);
- offset += sprintf(buf + offset, "Mac: %pM",
+ offset += sprintf(buf + offset, "Mac: %pM ",
rulla->mac_addr);
- offset += sprintf(buf + offset, " Atm:");
- for (j = 0; j < ATM_ESA_LEN; j++) {
- offset += sprintf(buf + offset,
- "%2.2x ",
- rulla->atm_addr[j] & 0xff);
- }
+ offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN,
+ rulla->atm_addr);
offset += sprintf(buf + offset,
"Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
rulla->vcc ? rulla->vcc->vpi : 0,
@@ -1392,12 +1383,9 @@ static void dump_arp_table(struct lec_priv *priv)
pr_info("No forward\n");
hlist_for_each_entry(rulla, &priv->lec_no_forward, next) {
offset = 0;
- offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
- offset += sprintf(buf + offset, " Atm:");
- for (j = 0; j < ATM_ESA_LEN; j++) {
- offset += sprintf(buf + offset, "%2.2x ",
- rulla->atm_addr[j] & 0xff);
- }
+ offset += sprintf(buf + offset, "Mac: %pM ", rulla->mac_addr);
+ offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN,
+ rulla->atm_addr);
offset += sprintf(buf + offset,
"Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
rulla->vcc ? rulla->vcc->vpi : 0,
@@ -1417,12 +1405,9 @@ static void dump_arp_table(struct lec_priv *priv)
pr_info("Empty ones\n");
hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) {
offset = 0;
- offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
- offset += sprintf(buf + offset, " Atm:");
- for (j = 0; j < ATM_ESA_LEN; j++) {
- offset += sprintf(buf + offset, "%2.2x ",
- rulla->atm_addr[j] & 0xff);
- }
+ offset += sprintf(buf + offset, "Mac: %pM ", rulla->mac_addr);
+ offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN,
+ rulla->atm_addr);
offset += sprintf(buf + offset,
"Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
rulla->vcc ? rulla->vcc->vpi : 0,
@@ -1442,12 +1427,9 @@ static void dump_arp_table(struct lec_priv *priv)
pr_info("Multicast Forward VCCs\n");
hlist_for_each_entry(rulla, &priv->mcast_fwds, next) {
offset = 0;
- offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
- offset += sprintf(buf + offset, " Atm:");
- for (j = 0; j < ATM_ESA_LEN; j++) {
- offset += sprintf(buf + offset, "%2.2x ",
- rulla->atm_addr[j] & 0xff);
- }
+ offset += sprintf(buf + offset, "Mac: %pM ", rulla->mac_addr);
+ offset += sprintf(buf + offset, "Atm: %*ph ", ATM_ESA_LEN,
+ rulla->atm_addr);
offset += sprintf(buf + offset,
"Vcc vpi:%d vci:%d, Recv_vcc vpi:%d vci:%d Last_used:%lx, Timestamp:%lx, No_tries:%d ",
rulla->vcc ? rulla->vcc->vpi : 0,
@@ -1973,17 +1955,8 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
* Vcc which we don't want to make default vcc,
* attach it anyway.
*/
- pr_debug("LEC_ARP:Attaching data direct, not default: %2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
- ioc_data->atm_addr[0], ioc_data->atm_addr[1],
- ioc_data->atm_addr[2], ioc_data->atm_addr[3],
- ioc_data->atm_addr[4], ioc_data->atm_addr[5],
- ioc_data->atm_addr[6], ioc_data->atm_addr[7],
- ioc_data->atm_addr[8], ioc_data->atm_addr[9],
- ioc_data->atm_addr[10], ioc_data->atm_addr[11],
- ioc_data->atm_addr[12], ioc_data->atm_addr[13],
- ioc_data->atm_addr[14], ioc_data->atm_addr[15],
- ioc_data->atm_addr[16], ioc_data->atm_addr[17],
- ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
+ pr_debug("LEC_ARP:Attaching data direct, not default: %*phN\n",
+ ATM_ESA_LEN, ioc_data->atm_addr);
entry = make_entry(priv, bus_mac);
if (entry == NULL)
goto out;
@@ -1999,17 +1972,8 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
dump_arp_table(priv);
goto out;
}
- pr_debug("LEC_ARP:Attaching data direct, default: %2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
- ioc_data->atm_addr[0], ioc_data->atm_addr[1],
- ioc_data->atm_addr[2], ioc_data->atm_addr[3],
- ioc_data->atm_addr[4], ioc_data->atm_addr[5],
- ioc_data->atm_addr[6], ioc_data->atm_addr[7],
- ioc_data->atm_addr[8], ioc_data->atm_addr[9],
- ioc_data->atm_addr[10], ioc_data->atm_addr[11],
- ioc_data->atm_addr[12], ioc_data->atm_addr[13],
- ioc_data->atm_addr[14], ioc_data->atm_addr[15],
- ioc_data->atm_addr[16], ioc_data->atm_addr[17],
- ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
+ pr_debug("LEC_ARP:Attaching data direct, default: %*phN\n",
+ ATM_ESA_LEN, ioc_data->atm_addr);
for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
hlist_for_each_entry(entry,
&priv->lec_arp_tables[i], next) {
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 46d6cd9a36ae..829db9eba0cb 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -53,15 +53,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
static int parse_qos(const char *buff);
-/*
- * Define allowed FILE OPERATIONS
- */
-static const struct file_operations mpc_file_operations = {
- .open = proc_mpc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = proc_mpc_write,
- .release = seq_release,
+static const struct proc_ops mpc_proc_ops = {
+ .proc_open = proc_mpc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = proc_mpc_write,
+ .proc_release = seq_release,
};
/*
@@ -290,7 +287,7 @@ int mpc_proc_init(void)
{
struct proc_dir_entry *p;
- p = proc_create(STAT_FILE_NAME, 0, atm_proc_root, &mpc_file_operations);
+ p = proc_create(STAT_FILE_NAME, 0, atm_proc_root, &mpc_proc_ops);
if (!p) {
pr_err("Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME);
return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index d79221fd4dae..4369ffa3302a 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -36,9 +36,9 @@
static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
size_t count, loff_t *pos);
-static const struct file_operations proc_atm_dev_ops = {
- .read = proc_dev_atm_read,
- .llseek = noop_llseek,
+static const struct proc_ops atm_dev_proc_ops = {
+ .proc_read = proc_dev_atm_read,
+ .proc_lseek = noop_llseek,
};
static void add_stats(struct seq_file *seq, const char *aal,
@@ -134,8 +134,7 @@ static void vcc_seq_stop(struct seq_file *seq, void *v)
static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
v = vcc_walk(seq, 1);
- if (v)
- (*pos)++;
+ (*pos)++;
return v;
}
@@ -360,7 +359,7 @@ int atm_proc_dev_register(struct atm_dev *dev)
goto err_out;
dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
- &proc_atm_dev_ops, dev);
+ &atm_dev_proc_ops, dev);
if (!dev->proc_entry)
goto err_free_name;
return 0;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 324306d6fde0..ff57ea89c27e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -808,7 +808,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
ax25_cb *ax25;
- if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ if (protocol < 0 || protocol > U8_MAX)
return -EINVAL;
if (!net_eq(net, &init_net))
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index d5028af750d5..c762758a4649 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
@@ -100,7 +100,6 @@ config BATMAN_ADV_DEBUG
config BATMAN_ADV_SYSFS
bool "batman-adv sysfs entries"
depends on BATMAN_ADV
- default y
help
Say Y here if you want to enable batman-adv device configuration and
status interface through sysfs attributes. It is replaced by the
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index fd63e116d9ff..daa49af7ff40 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index fa39eaaab9d7..382fbe51fd34 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 37898da8ad48..686a60bc9492 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*/
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 5b0b20e6da95..a7c8dd7ae513 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
@@ -789,6 +789,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
+ /* interface already disabled by batadv_iv_ogm_iface_disable */
+ if (!*ogm_buff)
+ return;
+
/* the interface gets activated here to avoid race conditions between
* the moment of activating the interface in
* hardif_activate_interface() where the originator mac is set and
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index c7a9ba305bfc..0c57c1000c64 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4ff6cf1ecae7..0ecaf1bb0068 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*/
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index 37833db098e6..5e0be10bc84e 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*/
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 2614a9caee00..1e3172db7492 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*/
@@ -107,10 +107,17 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
}
if (ret)
goto default_throughput;
- if (!(sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)))
- goto default_throughput;
- return sinfo.expected_throughput / 100;
+ if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))
+ return sinfo.expected_throughput / 100;
+
+ /* try to estimate the expected throughput based on reported tx
+ * rates
+ */
+ if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
+ return cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
+
+ goto default_throughput;
}
/* if not a wifi interface, check if this device provides data via
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 1a29505f4f66..4358d436be2a 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*/
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 714ce56cfcc8..969466218999 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index bf16d040461d..0ae2575f70bb 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 7f04a6acf14e..4bc695cda397 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 84ad2d2b6ac9..533c6d44cb58 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 663a53b6d36e..41cc87f06b14 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*/
@@ -844,7 +844,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
/* handle as ANNOUNCE frame */
backbone_gw->lasttime = jiffies;
- crc = ntohs(*((__be16 *)(&an_addr[4])));
+ crc = ntohs(*((__force __be16 *)(&an_addr[4])));
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"%s(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 02b24a861a85..41edb2c4a327 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*/
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 38c4d8e51155..452856c27d20 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 1c5afd301ce9..7e2e8f586f42 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index ec7bf5a4a9fc..3d21dd83f8cc 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
@@ -246,7 +246,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
*/
static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
{
- return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN);
+ return *(__force __be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN);
}
/**
@@ -270,7 +270,9 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
*/
static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
{
- return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN * 2 + 4);
+ u8 *dst = batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN * 2 + 4;
+
+ return *(__force __be32 *)dst;
}
/**
@@ -288,7 +290,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
__be16 vid;
u32 i;
- key = (const unsigned char *)&dat->ip;
+ key = (__force const unsigned char *)&dat->ip;
for (i = 0; i < sizeof(dat->ip); i++) {
hash += key[i];
hash += (hash << 10);
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 67c7729add55..2bff2f4a325c 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 385fccdcf69d..7cad97644d05 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*/
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index abfe8c6556de..881ef328b6cd 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*/
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 47df4c678988..e22e49289677 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 0be8e7178ec7..88b5dba84354 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index fc55750542e4..16cd9450ceb1 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 211b14b37db8..c3a0c5a7f7e9 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index afb52282d5bd..c7e98a40dd33 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index bbb8a6f18d6b..bad2e50135e8 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index a9d4e176f4de..68638e0450a6 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 57877f0b78e0..91ae9f32b580 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 0a70b66e8770..ccb535c77e5d 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 27fafff586df..6abd0f4742ef 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 11941cf1adcc..a67b2b091447 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 741cfa3719ff..f9884dc56cf3 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
@@ -74,7 +74,7 @@ __printf(2, 3);
* @bat_priv: the bat priv with all the soft interface information
* @ratelimited: whether output should be rate limited
* @fmt: format string
- * @arg...: variable arguments
+ * @arg: variable arguments
*/
#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
do { \
@@ -98,7 +98,7 @@ static inline void _batadv_dbg(int type __always_unused,
* batadv_dbg() - Store debug output without ratelimiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
- * @arg...: format string and variable arguments
+ * @arg: format string and variable arguments
*/
#define batadv_dbg(type, bat_priv, arg...) \
_batadv_dbg(type, bat_priv, 0, ## arg)
@@ -107,7 +107,7 @@ static inline void _batadv_dbg(int type __always_unused,
* batadv_dbg_ratelimited() - Store debug output with ratelimiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
- * @arg...: format string and variable arguments
+ * @arg: format string and variable arguments
*/
#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
_batadv_dbg(type, bat_priv, 1, ## arg)
@@ -116,7 +116,7 @@ static inline void _batadv_dbg(int type __always_unused,
* batadv_info() - Store message in debug buffer and print it to kmsg buffer
* @net_dev: the soft interface net device
* @fmt: format string
- * @arg...: variable arguments
+ * @arg: variable arguments
*/
#define batadv_info(net_dev, fmt, arg...) \
do { \
@@ -130,7 +130,7 @@ static inline void _batadv_dbg(int type __always_unused,
* batadv_err() - Store error in debug buffer and print it to kmsg buffer
* @net_dev: the soft interface net device
* @fmt: format string
- * @arg...: variable arguments
+ * @arg: variable arguments
*/
#define batadv_err(net_dev, fmt, arg...) \
do { \
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4811ec65bc43..d8a255c85e77 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index c7b340ddd0e7..692306df7b6f 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2019.5"
+#define BATADV_SOURCE_VERSION "2020.0"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index f9ec8e7507b6..9ebdc1e864b9 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2020 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*/
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 5d9e2bb29c97..ebf825991ecd 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2020 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*/
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 7e052d6f759b..02ed073f95a9 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*/
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index ddc674e47dbb..7ee48f916997 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*/
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 580609389f0f..8f0717c3f7b5 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*/
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 753fa49723cf..334289084127 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*/
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 38613487fb1b..5b0c2fffc214 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 512a1f99dd75..7bc01c138b3a 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f0f864820dea..3632bd976c56 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index c20feac95107..2ed49db6eff5 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3ce5f7bad369..7f8ade04e08e 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 5fc0fd1e5d08..0d36e15589f6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 832e156c519e..5f05a728f347 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 29139ad769fe..534e08d6ad91 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index e5bbc28ed12c..c45962d8527b 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 5e466093dfa5..d987f8b30a98 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index dd6a9a40dbb9..bd2ac570c42c 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*/
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index 78d310da0ad3..140105215aa2 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*/
diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
index 3cedd2c36528..3444d9e4e90d 100644
--- a/net/batman-adv/trace.c
+++ b/net/batman-adv/trace.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Sven Eckelmann
*/
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index d8f764521c0b..f631b1e01b89 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
*
* Sven Eckelmann
*/
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 8a482c5ec67b..852932838ddc 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*/
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 4a98860d7f0e..b24d35b9226a 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*/
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index aae63f0d21eb..0963a43ad996 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 36985000a0a8..d509d00c7a23 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 47718a82eaf2..4a17a66cc572 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
@@ -457,7 +457,7 @@ struct batadv_orig_node {
/**
* @tt_lock: prevents from updating the table while reading it. Table
* update is made up by two operations (data structure update and
- * metdata -CRC/TTVN-recalculation) and they have to be executed
+ * metadata -CRC/TTVN-recalculation) and they have to be executed
* atomically in order to avoid another thread to read the
* table/metadata between those.
*/
@@ -1011,7 +1011,7 @@ struct batadv_priv_tt {
/**
* @commit_lock: prevents from executing a local TT commit while reading
* the local table. The local TT commit is made up by two operations
- * (data structure update and metdata -CRC/TTVN- recalculation) and
+ * (data structure update and metadata -CRC/TTVN- recalculation) and
* they have to be executed atomically in order to avoid another thread
* to read the table/metadata between those.
*/
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 1d4d7d415730..cc1cff63194f 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -112,7 +112,7 @@ static int bnep_net_set_mac_addr(struct net_device *dev, void *arg)
return 0;
}
-static void bnep_net_timeout(struct net_device *dev)
+static void bnep_net_timeout(struct net_device *dev, unsigned int txqueue)
{
BT_DBG("net_timeout");
netif_wake_queue(dev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9e19d5a3aac8..cbbc34a006d1 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2311,6 +2311,33 @@ void hci_smp_irks_clear(struct hci_dev *hdev)
}
}
+void hci_blocked_keys_clear(struct hci_dev *hdev)
+{
+ struct blocked_key *b;
+
+ list_for_each_entry_rcu(b, &hdev->blocked_keys, list) {
+ list_del_rcu(&b->list);
+ kfree_rcu(b, rcu);
+ }
+}
+
+bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16])
+{
+ bool blocked = false;
+ struct blocked_key *b;
+
+ rcu_read_lock();
+ list_for_each_entry(b, &hdev->blocked_keys, list) {
+ if (b->type == type && !memcmp(b->val, val, sizeof(b->val))) {
+ blocked = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return blocked;
+}
+
struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
{
struct link_key *k;
@@ -2319,6 +2346,16 @@ struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
list_for_each_entry_rcu(k, &hdev->link_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) == 0) {
rcu_read_unlock();
+
+ if (hci_is_blocked_key(hdev,
+ HCI_BLOCKED_KEY_TYPE_LINKKEY,
+ k->val)) {
+ bt_dev_warn_ratelimited(hdev,
+ "Link key blocked for %pMR",
+ &k->bdaddr);
+ return NULL;
+ }
+
return k;
}
}
@@ -2387,6 +2424,15 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
if (smp_ltk_is_sc(k) || ltk_role(k->type) == role) {
rcu_read_unlock();
+
+ if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK,
+ k->val)) {
+ bt_dev_warn_ratelimited(hdev,
+ "LTK blocked for %pMR",
+ &k->bdaddr);
+ return NULL;
+ }
+
return k;
}
}
@@ -2397,31 +2443,42 @@ struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr,
struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa)
{
+ struct smp_irk *irk_to_return = NULL;
struct smp_irk *irk;
rcu_read_lock();
list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
if (!bacmp(&irk->rpa, rpa)) {
- rcu_read_unlock();
- return irk;
+ irk_to_return = irk;
+ goto done;
}
}
list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
if (smp_irk_matches(hdev, irk->val, rpa)) {
bacpy(&irk->rpa, rpa);
- rcu_read_unlock();
- return irk;
+ irk_to_return = irk;
+ goto done;
}
}
+
+done:
+ if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK,
+ irk_to_return->val)) {
+ bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR",
+ &irk_to_return->bdaddr);
+ irk_to_return = NULL;
+ }
+
rcu_read_unlock();
- return NULL;
+ return irk_to_return;
}
struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
u8 addr_type)
{
+ struct smp_irk *irk_to_return = NULL;
struct smp_irk *irk;
/* Identity Address must be public or static random */
@@ -2432,13 +2489,23 @@ struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
if (addr_type == irk->addr_type &&
bacmp(bdaddr, &irk->bdaddr) == 0) {
- rcu_read_unlock();
- return irk;
+ irk_to_return = irk;
+ goto done;
}
}
+
+done:
+
+ if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK,
+ irk_to_return->val)) {
+ bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR",
+ &irk_to_return->bdaddr);
+ irk_to_return = NULL;
+ }
+
rcu_read_unlock();
- return NULL;
+ return irk_to_return;
}
struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn,
@@ -3244,6 +3311,7 @@ struct hci_dev *hci_alloc_dev(void)
INIT_LIST_HEAD(&hdev->pend_le_reports);
INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_LIST_HEAD(&hdev->adv_instances);
+ INIT_LIST_HEAD(&hdev->blocked_keys);
INIT_WORK(&hdev->rx_work, hci_rx_work);
INIT_WORK(&hdev->cmd_work, hci_cmd_work);
@@ -3443,6 +3511,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_bdaddr_list_clear(&hdev->le_resolv_list);
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
+ hci_blocked_keys_clear(hdev);
hci_dev_unlock(hdev);
hci_dev_put(hdev);
@@ -3496,7 +3565,8 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
- hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) {
kfree_skb(skb);
return -EINVAL;
}
@@ -4218,15 +4288,10 @@ static void hci_sched_le(struct hci_dev *hdev)
if (!hci_conn_num(hdev, LE_LINK))
return;
- if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
- /* LE tx timeout must be longer than maximum
- * link supervision timeout (40.9 seconds) */
- if (!hdev->le_cnt && hdev->le_pkts &&
- time_after(jiffies, hdev->le_last_tx + HZ * 45))
- hci_link_tx_to(hdev, LE_LINK);
- }
-
cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
+
+ __check_timeout(hdev, cnt);
+
tmp = cnt;
while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) {
u32 priority = (skb_peek(&chan->data_q))->priority;
@@ -4479,6 +4544,7 @@ static void hci_rx_work(struct work_struct *work)
switch (hci_skb_pkt_type(skb)) {
case HCI_ACLDATA_PKT:
case HCI_SCODATA_PKT:
+ case HCI_ISODATA_PKT:
kfree_skb(skb);
continue;
}
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 402e2cc54044..6b1314c738b8 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -26,6 +26,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include "smp.h"
#include "hci_debugfs.h"
#define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \
@@ -152,6 +153,21 @@ static int blacklist_show(struct seq_file *f, void *p)
DEFINE_SHOW_ATTRIBUTE(blacklist);
+static int blocked_keys_show(struct seq_file *f, void *p)
+{
+ struct hci_dev *hdev = f->private;
+ struct blocked_key *key;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(key, &hdev->blocked_keys, list)
+ seq_printf(f, "%u %*phN\n", key->type, 16, key->val);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(blocked_keys);
+
static int uuids_show(struct seq_file *f, void *p)
{
struct hci_dev *hdev = f->private;
@@ -308,6 +324,8 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
&device_list_fops);
debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev,
&blacklist_fops);
+ debugfs_create_file("blocked_keys", 0444, hdev->debugfs, hdev,
+ &blocked_keys_fops);
debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
debugfs_create_file("remote_oob", 0400, hdev->debugfs, hdev,
&remote_oob_fops);
@@ -972,6 +990,62 @@ static int adv_max_interval_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
adv_max_interval_set, "%llu\n");
+static int min_key_size_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->le_min_key_size = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int min_key_size_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->le_min_key_size;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(min_key_size_fops, min_key_size_get,
+ min_key_size_set, "%llu\n");
+
+static int max_key_size_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->le_max_key_size = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int max_key_size_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->le_max_key_size;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(max_key_size_fops, max_key_size_get,
+ max_key_size_set, "%llu\n");
+
static int auth_payload_timeout_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
@@ -1054,6 +1128,10 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
&adv_max_interval_fops);
debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs,
&hdev->discov_interleaved_timeout);
+ debugfs_create_file("min_key_size", 0644, hdev->debugfs, hdev,
+ &min_key_size_fops);
+ debugfs_create_file("max_key_size", 0644, hdev->debugfs, hdev,
+ &max_key_size_fops);
debugfs_create_file("auth_payload_timeout", 0644, hdev->debugfs, hdev,
&auth_payload_timeout_fops);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index c1d3a303d97f..6ddc4a74a5e4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5451,7 +5451,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_unlock(hdev);
}
-static u8 ext_evt_type_to_legacy(u16 evt_type)
+static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type)
{
if (evt_type & LE_EXT_ADV_LEGACY_PDU) {
switch (evt_type) {
@@ -5468,10 +5468,7 @@ static u8 ext_evt_type_to_legacy(u16 evt_type)
return LE_ADV_SCAN_RSP;
}
- BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
- evt_type);
-
- return LE_ADV_INVALID;
+ goto invalid;
}
if (evt_type & LE_EXT_ADV_CONN_IND) {
@@ -5491,8 +5488,9 @@ static u8 ext_evt_type_to_legacy(u16 evt_type)
evt_type & LE_EXT_ADV_DIRECT_IND)
return LE_ADV_NONCONN_IND;
- BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
- evt_type);
+invalid:
+ bt_dev_err_ratelimited(hdev, "Unknown advertising packet type: 0x%02x",
+ evt_type);
return LE_ADV_INVALID;
}
@@ -5510,7 +5508,7 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
u16 evt_type;
evt_type = __le16_to_cpu(ev->evt_type);
- legacy_evt_type = ext_evt_type_to_legacy(evt_type);
+ legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &ev->bdaddr,
ev->bdaddr_type, NULL, 0, ev->rssi,
@@ -5720,6 +5718,29 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev,
hci_dev_unlock(hdev);
}
+static void hci_le_phy_update_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_ev_le_phy_update_complete *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+ if (!ev->status)
+ return;
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+ if (!conn)
+ goto unlock;
+
+ conn->le_tx_phy = ev->tx_phy;
+ conn->le_rx_phy = ev->rx_phy;
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_le_meta *le_ev = (void *) skb->data;
@@ -5755,6 +5776,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_le_direct_adv_report_evt(hdev, skb);
break;
+ case HCI_EV_LE_PHY_UPDATE_COMPLETE:
+ hci_le_phy_update_evt(hdev, skb);
+ break;
+
case HCI_EV_LE_EXT_ADV_REPORT:
hci_le_ext_adv_report_evt(hdev, skb);
break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 5d0ed28c0d3a..9c4a093f8960 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -211,7 +211,8 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT &&
hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
- hci_skb_pkt_type(skb) != HCI_SCODATA_PKT)
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ISODATA_PKT)
continue;
if (is_filtered_packet(sk, skb))
continue;
@@ -220,7 +221,8 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
continue;
if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT &&
hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
- hci_skb_pkt_type(skb) != HCI_SCODATA_PKT)
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ISODATA_PKT)
continue;
} else {
/* Don't send frame to other channel types */
@@ -324,6 +326,12 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
else
opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT);
break;
+ case HCI_ISODATA_PKT:
+ if (bt_cb(skb)->incoming)
+ opcode = cpu_to_le16(HCI_MON_ISO_RX_PKT);
+ else
+ opcode = cpu_to_le16(HCI_MON_ISO_TX_PKT);
+ break;
case HCI_DIAG_PKT:
opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG);
break;
@@ -831,6 +839,8 @@ static int hci_sock_release(struct socket *sock)
if (!sk)
return 0;
+ lock_sock(sk);
+
switch (hci_pi(sk)->channel) {
case HCI_CHANNEL_MONITOR:
atomic_dec(&monitor_promisc);
@@ -878,6 +888,7 @@ static int hci_sock_release(struct socket *sock)
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
+ release_sock(sk);
sock_put(sk);
return 0;
}
@@ -1762,7 +1773,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
*/
if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT &&
hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
- hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) {
err = -EINVAL;
goto drop;
}
@@ -1806,7 +1818,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
}
if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT &&
- hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) {
+ hci_skb_pkt_type(skb) != HCI_SCODATA_PKT &&
+ hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) {
err = -EINVAL;
goto drop;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a845786258a0..195459a1e53e 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1289,6 +1289,9 @@ static void l2cap_le_connect(struct l2cap_chan *chan)
if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags))
return;
+ if (!chan->imtu)
+ chan->imtu = chan->conn->mtu;
+
l2cap_le_flowctl_init(chan, 0);
req.psm = chan->psm;
@@ -3226,6 +3229,49 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan)
chan->ack_win = chan->tx_win;
}
+static void l2cap_mtu_auto(struct l2cap_chan *chan)
+{
+ struct hci_conn *conn = chan->conn->hcon;
+
+ chan->imtu = L2CAP_DEFAULT_MIN_MTU;
+
+ /* The 2-DH1 packet has between 2 and 56 information bytes
+ * (including the 2-byte payload header)
+ */
+ if (!(conn->pkt_type & HCI_2DH1))
+ chan->imtu = 54;
+
+ /* The 3-DH1 packet has between 2 and 85 information bytes
+ * (including the 2-byte payload header)
+ */
+ if (!(conn->pkt_type & HCI_3DH1))
+ chan->imtu = 83;
+
+ /* The 2-DH3 packet has between 2 and 369 information bytes
+ * (including the 2-byte payload header)
+ */
+ if (!(conn->pkt_type & HCI_2DH3))
+ chan->imtu = 367;
+
+ /* The 3-DH3 packet has between 2 and 554 information bytes
+ * (including the 2-byte payload header)
+ */
+ if (!(conn->pkt_type & HCI_3DH3))
+ chan->imtu = 552;
+
+ /* The 2-DH5 packet has between 2 and 681 information bytes
+ * (including the 2-byte payload header)
+ */
+ if (!(conn->pkt_type & HCI_2DH5))
+ chan->imtu = 679;
+
+ /* The 3-DH5 packet has between 2 and 1023 information bytes
+ * (including the 2-byte payload header)
+ */
+ if (!(conn->pkt_type & HCI_3DH5))
+ chan->imtu = 1021;
+}
+
static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
{
struct l2cap_conf_req *req = data;
@@ -3255,8 +3301,12 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data
}
done:
- if (chan->imtu != L2CAP_DEFAULT_MTU)
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
+ if (chan->imtu != L2CAP_DEFAULT_MTU) {
+ if (!chan->imtu)
+ l2cap_mtu_auto(chan);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu,
+ endptr - ptr);
+ }
switch (chan->mode) {
case L2CAP_MODE_BASIC:
@@ -5031,7 +5081,6 @@ static inline int l2cap_move_channel_req(struct l2cap_conn *conn,
chan->move_role = L2CAP_MOVE_ROLE_RESPONDER;
l2cap_move_setup(chan);
chan->move_id = req->dest_amp_id;
- icid = chan->dcid;
if (req->dest_amp_id == AMP_ID_BREDR) {
/* Moving to BR/EDR */
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 63e65d9b4b24..c09e0a3a0ed9 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -183,6 +183,22 @@ void bt_err(const char *format, ...)
}
EXPORT_SYMBOL(bt_err);
+void bt_warn_ratelimited(const char *format, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, format);
+
+ vaf.fmt = format;
+ vaf.va = &args;
+
+ pr_warn_ratelimited("%pV", &vaf);
+
+ va_end(args);
+}
+EXPORT_SYMBOL(bt_warn_ratelimited);
+
void bt_err_ratelimited(const char *format, ...)
{
struct va_format vaf;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index acb7c6d5643f..3074363c68df 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
#include "mgmt_util.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 14
+#define MGMT_REVISION 15
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -106,6 +106,7 @@ static const u16 mgmt_commands[] = {
MGMT_OP_START_LIMITED_DISCOVERY,
MGMT_OP_READ_EXT_INFO,
MGMT_OP_SET_APPEARANCE,
+ MGMT_OP_SET_BLOCKED_KEYS,
};
static const u16 mgmt_events[] = {
@@ -175,7 +176,7 @@ static const u16 mgmt_untrusted_events[] = {
"\x00\x00\x00\x00\x00\x00\x00\x00"
/* HCI to MGMT error code conversion table */
-static u8 mgmt_status_table[] = {
+static const u8 mgmt_status_table[] = {
MGMT_STATUS_SUCCESS,
MGMT_STATUS_UNKNOWN_COMMAND, /* Unknown Command */
MGMT_STATUS_NOT_CONNECTED, /* No Connection */
@@ -2341,6 +2342,14 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
for (i = 0; i < key_count; i++) {
struct mgmt_link_key_info *key = &cp->keys[i];
+ if (hci_is_blocked_key(hdev,
+ HCI_BLOCKED_KEY_TYPE_LINKKEY,
+ key->val)) {
+ bt_dev_warn(hdev, "Skipping blocked link key for %pMR",
+ &key->addr.bdaddr);
+ continue;
+ }
+
/* Always ignore debug keys and require a new pairing if
* the user wants to use them.
*/
@@ -3282,7 +3291,7 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_set_appearance *cp = data;
- u16 apperance;
+ u16 appearance;
int err;
BT_DBG("");
@@ -3291,12 +3300,12 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE,
MGMT_STATUS_NOT_SUPPORTED);
- apperance = le16_to_cpu(cp->appearance);
+ appearance = le16_to_cpu(cp->appearance);
hci_dev_lock(hdev);
- if (hdev->appearance != apperance) {
- hdev->appearance = apperance;
+ if (hdev->appearance != appearance) {
+ hdev->appearance = appearance;
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE);
@@ -3531,6 +3540,55 @@ unlock:
return err;
}
+static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 len)
+{
+ int err = MGMT_STATUS_SUCCESS;
+ struct mgmt_cp_set_blocked_keys *keys = data;
+ const u16 max_key_count = ((U16_MAX - sizeof(*keys)) /
+ sizeof(struct mgmt_blocked_key_info));
+ u16 key_count, expected_len;
+ int i;
+
+ BT_DBG("request for %s", hdev->name);
+
+ key_count = __le16_to_cpu(keys->key_count);
+ if (key_count > max_key_count) {
+ bt_dev_err(hdev, "too big key_count value %u", key_count);
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BLOCKED_KEYS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ expected_len = struct_size(keys, keys, key_count);
+ if (expected_len != len) {
+ bt_dev_err(hdev, "expected %u bytes, got %u bytes",
+ expected_len, len);
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BLOCKED_KEYS,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ hci_dev_lock(hdev);
+
+ hci_blocked_keys_clear(hdev);
+
+ for (i = 0; i < keys->key_count; ++i) {
+ struct blocked_key *b = kzalloc(sizeof(*b), GFP_KERNEL);
+
+ if (!b) {
+ err = MGMT_STATUS_NO_RESOURCES;
+ break;
+ }
+
+ b->type = keys->keys[i].type;
+ memcpy(b->val, keys->keys[i].val, sizeof(b->val));
+ list_add_rcu(&b->list, &hdev->blocked_keys);
+ }
+ hci_dev_unlock(hdev);
+
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_BLOCKED_KEYS,
+ err, NULL, 0);
+}
+
static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
u16 opcode, struct sk_buff *skb)
{
@@ -5051,6 +5109,14 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
for (i = 0; i < irk_count; i++) {
struct mgmt_irk_info *irk = &cp->irks[i];
+ if (hci_is_blocked_key(hdev,
+ HCI_BLOCKED_KEY_TYPE_IRK,
+ irk->val)) {
+ bt_dev_warn(hdev, "Skipping blocked IRK for %pMR",
+ &irk->addr.bdaddr);
+ continue;
+ }
+
hci_add_irk(hdev, &irk->addr.bdaddr,
le_addr_type(irk->addr.type), irk->val,
BDADDR_ANY);
@@ -5134,6 +5200,14 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
struct mgmt_ltk_info *key = &cp->keys[i];
u8 type, authenticated;
+ if (hci_is_blocked_key(hdev,
+ HCI_BLOCKED_KEY_TYPE_LTK,
+ key->val)) {
+ bt_dev_warn(hdev, "Skipping blocked LTK for %pMR",
+ &key->addr.bdaddr);
+ continue;
+ }
+
switch (key->type) {
case MGMT_LTK_UNAUTHENTICATED:
authenticated = 0x00;
@@ -6914,6 +6988,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
{ set_appearance, MGMT_SET_APPEARANCE_SIZE },
{ get_phy_configuration, MGMT_GET_PHY_CONFIGURATION_SIZE },
{ set_phy_configuration, MGMT_SET_PHY_CONFIGURATION_SIZE },
+ { set_blocked_keys, MGMT_OP_SET_BLOCKED_KEYS_SIZE,
+ HCI_MGMT_VAR_LEN },
};
void mgmt_index_added(struct hci_dev *hdev)
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6b42be4b5861..204f14f8b507 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2453,6 +2453,15 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*rp))
return SMP_INVALID_PARAMS;
+ /* Pairing is aborted if any blocked keys are distributed */
+ if (hci_is_blocked_key(conn->hcon->hdev, HCI_BLOCKED_KEY_TYPE_LTK,
+ rp->ltk)) {
+ bt_dev_warn_ratelimited(conn->hcon->hdev,
+ "LTK blocked for %pMR",
+ &conn->hcon->dst);
+ return SMP_INVALID_PARAMS;
+ }
+
SMP_ALLOW_CMD(smp, SMP_CMD_MASTER_IDENT);
skb_pull(skb, sizeof(*rp));
@@ -2509,6 +2518,15 @@ static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*info))
return SMP_INVALID_PARAMS;
+ /* Pairing is aborted if any blocked keys are distributed */
+ if (hci_is_blocked_key(conn->hcon->hdev, HCI_BLOCKED_KEY_TYPE_IRK,
+ info->irk)) {
+ bt_dev_warn_ratelimited(conn->hcon->hdev,
+ "Identity key blocked for %pMR",
+ &conn->hcon->dst);
+ return SMP_INVALID_PARAMS;
+ }
+
SMP_ALLOW_CMD(smp, SMP_CMD_IDENT_ADDR_INFO);
skb_pull(skb, sizeof(*info));
@@ -3355,94 +3373,6 @@ static const struct file_operations force_bredr_smp_fops = {
.llseek = default_llseek,
};
-static ssize_t le_min_key_size_read(struct file *file,
- char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[4];
-
- snprintf(buf, sizeof(buf), "%2u\n", hdev->le_min_key_size);
-
- return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
-}
-
-static ssize_t le_min_key_size_write(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf) - 1));
- u8 key_size;
-
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
-
- sscanf(buf, "%hhu", &key_size);
-
- if (key_size > hdev->le_max_key_size ||
- key_size < SMP_MIN_ENC_KEY_SIZE)
- return -EINVAL;
-
- hdev->le_min_key_size = key_size;
-
- return count;
-}
-
-static const struct file_operations le_min_key_size_fops = {
- .open = simple_open,
- .read = le_min_key_size_read,
- .write = le_min_key_size_write,
- .llseek = default_llseek,
-};
-
-static ssize_t le_max_key_size_read(struct file *file,
- char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[4];
-
- snprintf(buf, sizeof(buf), "%2u\n", hdev->le_max_key_size);
-
- return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
-}
-
-static ssize_t le_max_key_size_write(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct hci_dev *hdev = file->private_data;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf) - 1));
- u8 key_size;
-
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
-
- sscanf(buf, "%hhu", &key_size);
-
- if (key_size > SMP_MAX_ENC_KEY_SIZE ||
- key_size < hdev->le_min_key_size)
- return -EINVAL;
-
- hdev->le_max_key_size = key_size;
-
- return count;
-}
-
-static const struct file_operations le_max_key_size_fops = {
- .open = simple_open,
- .read = le_max_key_size_read,
- .write = le_max_key_size_write,
- .llseek = default_llseek,
-};
-
int smp_register(struct hci_dev *hdev)
{
struct l2cap_chan *chan;
@@ -3467,11 +3397,6 @@ int smp_register(struct hci_dev *hdev)
hdev->smp_data = chan;
- debugfs_create_file("le_min_key_size", 0644, hdev->debugfs, hdev,
- &le_min_key_size_fops);
- debugfs_create_file("le_max_key_size", 0644, hdev->debugfs, hdev,
- &le_max_key_size_fops);
-
/* If the controller does not support BR/EDR Secure Connections
* feature, then the BR/EDR SMP channel shall not be present.
*
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f79205d4444f..d555c0d8657d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -15,7 +15,7 @@
#include <trace/events/bpf_test_run.h>
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
- u32 *retval, u32 *time)
+ u32 *retval, u32 *time, bool xdp)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
enum bpf_cgroup_storage_type stype;
@@ -41,7 +41,11 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
bpf_cgroup_storage_set(storage);
- *retval = BPF_PROG_RUN(prog, ctx);
+
+ if (xdp)
+ *retval = bpf_prog_run_xdp(prog, ctx);
+ else
+ *retval = BPF_PROG_RUN(prog, ctx);
if (signal_pending(current)) {
ret = -EINTR;
@@ -247,34 +251,53 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
return 0;
/* make sure the fields we don't use are zeroed */
- if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
+ if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
+ return -EINVAL;
+
+ /* mark is allowed */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
+ offsetof(struct __sk_buff, priority)))
return -EINVAL;
/* priority is allowed */
- if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
- sizeof_field(struct __sk_buff, priority),
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
offsetof(struct __sk_buff, cb)))
return -EINVAL;
/* cb is allowed */
- if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
- sizeof_field(struct __sk_buff, cb),
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
offsetof(struct __sk_buff, tstamp)))
return -EINVAL;
/* tstamp is allowed */
+ /* wire_len is allowed */
+ /* gso_segs is allowed */
- if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
- sizeof_field(struct __sk_buff, tstamp),
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
sizeof(struct __sk_buff)))
return -EINVAL;
+ skb->mark = __skb->mark;
skb->priority = __skb->priority;
skb->tstamp = __skb->tstamp;
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
+ if (__skb->wire_len == 0) {
+ cb->pkt_len = skb->len;
+ } else {
+ if (__skb->wire_len < skb->len ||
+ __skb->wire_len > GSO_MAX_SIZE)
+ return -EINVAL;
+ cb->pkt_len = __skb->wire_len;
+ }
+
+ if (__skb->gso_segs > GSO_MAX_SEGS)
+ return -EINVAL;
+ skb_shinfo(skb)->gso_segs = __skb->gso_segs;
+
return 0;
}
@@ -285,9 +308,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (!__skb)
return;
+ __skb->mark = skb->mark;
__skb->priority = skb->priority;
__skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
+ __skb->wire_len = cb->pkt_len;
+ __skb->gso_segs = skb_shinfo(skb)->gso_segs;
}
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -359,7 +385,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = convert___skb_to_skb(skb, ctx);
if (ret)
goto out;
- ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
+ ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
if (ret)
goto out;
if (!is_l2) {
@@ -416,8 +442,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
-
- ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
+ bpf_prog_change_xdp(NULL, prog);
+ ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
if (ret)
goto out;
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
@@ -425,6 +451,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
out:
+ bpf_prog_change_xdp(prog, NULL);
kfree(data);
return ret;
}
@@ -437,8 +464,7 @@ static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
/* flags is allowed */
- if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
- sizeof_field(struct bpf_flow_keys, flags),
+ if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
sizeof(struct bpf_flow_keys)))
return -EINVAL;
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index aa945ab5b655..36580301da70 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -3,7 +3,7 @@
# Makefile for the Linux BPFILTER layer.
#
-hostprogs-y := bpfilter_umh
+hostprogs := bpfilter_umh
bpfilter_umh-objs := main.o
KBUILD_HOSTCFLAGS += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
HOSTCC := $(CC)
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index ac9ef337f0fa..49da7ae6f077 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
-bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o
+bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o
bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index fb38add21b37..0e3dbc5f3c34 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -32,8 +32,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_bridge_mdb_entry *mdst;
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
const struct nf_br_ops *nf_ops;
+ u8 state = BR_STATE_FORWARDING;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -53,15 +53,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
- if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
+ if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 86637000f275..7629b63f6f30 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -25,7 +25,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
- br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+ p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) &&
nbp_switchdev_allowed_egress(p, skb) &&
!br_skb_isolated(p, skb);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8944ceb47fe9..fcc260840028 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -76,11 +76,14 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
bool local_rcv, mcast_hit = false;
struct net_bridge *br;
u16 vid = 0;
+ u8 state;
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
- if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
+ state = p->state;
+ if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
+ &state))
goto out;
nbp_switchdev_frame_mark(p, skb);
@@ -103,7 +106,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
}
}
- if (p->state == BR_STATE_LEARNING)
+ if (state == BR_STATE_LEARNING)
goto drop;
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a0a54482aabc..43dab4066f91 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -561,52 +561,73 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
return err;
}
-static int br_process_vlan_info(struct net_bridge *br,
- struct net_bridge_port *p, int cmd,
- struct bridge_vlan_info *vinfo_curr,
- struct bridge_vlan_info **vinfo_last,
- bool *changed,
- struct netlink_ext_ack *extack)
+int br_process_vlan_info(struct net_bridge *br,
+ struct net_bridge_port *p, int cmd,
+ struct bridge_vlan_info *vinfo_curr,
+ struct bridge_vlan_info **vinfo_last,
+ bool *changed,
+ struct netlink_ext_ack *extack)
{
- if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
+ int err, rtm_cmd;
+
+ if (!br_vlan_valid_id(vinfo_curr->vid, extack))
return -EINVAL;
+ /* needed for vlan-only NEWVLAN/DELVLAN notifications */
+ rtm_cmd = br_afspec_cmd_to_rtm(cmd);
+
if (vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
- /* check if we are already processing a range */
- if (*vinfo_last)
+ if (!br_vlan_valid_range(vinfo_curr, *vinfo_last, extack))
return -EINVAL;
*vinfo_last = vinfo_curr;
- /* don't allow range of pvids */
- if ((*vinfo_last)->flags & BRIDGE_VLAN_INFO_PVID)
- return -EINVAL;
return 0;
}
if (*vinfo_last) {
struct bridge_vlan_info tmp_vinfo;
- int v, err;
-
- if (!(vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END))
- return -EINVAL;
+ int v, v_change_start = 0;
- if (vinfo_curr->vid <= (*vinfo_last)->vid)
+ if (!br_vlan_valid_range(vinfo_curr, *vinfo_last, extack))
return -EINVAL;
memcpy(&tmp_vinfo, *vinfo_last,
sizeof(struct bridge_vlan_info));
for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
+ bool curr_change = false;
+
tmp_vinfo.vid = v;
- err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed,
+ err = br_vlan_info(br, p, cmd, &tmp_vinfo, &curr_change,
extack);
if (err)
break;
+ if (curr_change) {
+ *changed = curr_change;
+ if (!v_change_start)
+ v_change_start = v;
+ } else {
+ /* nothing to notify yet */
+ if (!v_change_start)
+ continue;
+ br_vlan_notify(br, p, v_change_start,
+ v - 1, rtm_cmd);
+ v_change_start = 0;
+ }
}
+ /* v_change_start is set only if the last/whole range changed */
+ if (v_change_start)
+ br_vlan_notify(br, p, v_change_start,
+ v - 1, rtm_cmd);
+
*vinfo_last = NULL;
return err;
}
- return br_vlan_info(br, p, cmd, vinfo_curr, changed, extack);
+ err = br_vlan_info(br, p, cmd, vinfo_curr, changed, extack);
+ if (*changed)
+ br_vlan_notify(br, p, vinfo_curr->vid, 0, rtm_cmd);
+
+ return err;
}
static int br_afspec(struct net_bridge *br,
@@ -1607,6 +1628,19 @@ static int br_fill_linkxstats(struct sk_buff *skb,
br_multicast_get_stats(br, p, nla_data(nla));
}
#endif
+
+ if (p) {
+ nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_STP,
+ sizeof(p->stp_xstats),
+ BRIDGE_XSTATS_PAD);
+ if (!nla)
+ goto nla_put_failure;
+
+ spin_lock_bh(&br->lock);
+ memcpy(nla_data(nla), &p->stp_xstats, sizeof(p->stp_xstats));
+ spin_unlock_bh(&br->lock);
+ }
+
nla_nest_end(skb, nest);
*prividx = 0;
@@ -1651,6 +1685,7 @@ int __init br_netlink_init(void)
int err;
br_mdb_init();
+ br_vlan_rtnl_init();
rtnl_af_register(&br_af_ops);
err = rtnl_link_register(&br_link_ops);
@@ -1668,6 +1703,7 @@ out_af:
void br_netlink_fini(void)
{
br_mdb_uninit();
+ br_vlan_rtnl_uninit();
rtnl_af_unregister(&br_af_ops);
rtnl_link_unregister(&br_link_ops);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 36b0367ca1e0..5153ffe79a01 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -113,6 +113,7 @@ enum {
* @vid: VLAN id
* @flags: bridge vlan flags
* @priv_flags: private (in-kernel) bridge vlan flags
+ * @state: STP state (e.g. blocking, learning, forwarding)
* @stats: per-cpu VLAN statistics
* @br: if MASTER flag set, this points to a bridge struct
* @port: if MASTER flag unset, this points to a port struct
@@ -133,6 +134,7 @@ struct net_bridge_vlan {
u16 vid;
u16 flags;
u16 priv_flags;
+ u8 state;
struct br_vlan_stats __percpu *stats;
union {
struct net_bridge *br;
@@ -157,6 +159,7 @@ struct net_bridge_vlan {
* @vlan_list: sorted VLAN entry list
* @num_vlans: number of total VLAN entries
* @pvid: PVID VLAN id
+ * @pvid_state: PVID's STP state (e.g. forwarding, learning, blocking)
*
* IMPORTANT: Be careful when checking if there're VLAN entries using list
* primitives because the bridge can have entries in its list which
@@ -170,6 +173,7 @@ struct net_bridge_vlan_group {
struct list_head vlan_list;
u16 num_vlans;
u16 pvid;
+ u8 pvid_state;
};
/* bridge fdb flags */
@@ -283,6 +287,8 @@ struct net_bridge_port {
#endif
u16 group_fwd_mask;
u16 backup_redirected_cnt;
+
+ struct bridge_stp_xstats stp_xstats;
};
#define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
@@ -505,6 +511,65 @@ static inline bool nbp_state_should_learn(const struct net_bridge_port *p)
return p->state == BR_STATE_LEARNING || p->state == BR_STATE_FORWARDING;
}
+static inline bool br_vlan_valid_id(u16 vid, struct netlink_ext_ack *extack)
+{
+ bool ret = vid > 0 && vid < VLAN_VID_MASK;
+
+ if (!ret)
+ NL_SET_ERR_MSG_MOD(extack, "Vlan id is invalid");
+
+ return ret;
+}
+
+static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur,
+ const struct bridge_vlan_info *last,
+ struct netlink_ext_ack *extack)
+{
+ /* pvid flag is not allowed in ranges */
+ if (cur->flags & BRIDGE_VLAN_INFO_PVID) {
+ NL_SET_ERR_MSG_MOD(extack, "Pvid isn't allowed in a range");
+ return false;
+ }
+
+ /* when cur is the range end, check if:
+ * - it has range start flag
+ * - range ids are invalid (end is equal to or before start)
+ */
+ if (last) {
+ if (cur->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+ NL_SET_ERR_MSG_MOD(extack, "Found a new vlan range start while processing one");
+ return false;
+ } else if (!(cur->flags & BRIDGE_VLAN_INFO_RANGE_END)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan range end flag is missing");
+ return false;
+ } else if (cur->vid <= last->vid) {
+ NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id");
+ return false;
+ }
+ }
+
+ /* check for required range flags */
+ if (!(cur->flags & (BRIDGE_VLAN_INFO_RANGE_BEGIN |
+ BRIDGE_VLAN_INFO_RANGE_END))) {
+ NL_SET_ERR_MSG_MOD(extack, "Both vlan range flags are missing");
+ return false;
+ }
+
+ return true;
+}
+
+static inline int br_afspec_cmd_to_rtm(int cmd)
+{
+ switch (cmd) {
+ case RTM_SETLINK:
+ return RTM_NEWVLAN;
+ case RTM_DELLINK:
+ return RTM_DELVLAN;
+ }
+
+ return 0;
+}
+
static inline int br_opt_get(const struct net_bridge *br,
enum net_bridge_opts opt)
{
@@ -874,7 +939,7 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid);
+ u16 *vid, u8 *state);
bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb);
bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
@@ -909,6 +974,14 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
void *ptr);
+void br_vlan_rtnl_init(void);
+void br_vlan_rtnl_uninit(void);
+void br_vlan_notify(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ u16 vid, u16 vid_range,
+ int cmd);
+bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *range_end);
static inline struct net_bridge_vlan_group *br_vlan_group(
const struct net_bridge *br)
@@ -960,11 +1033,15 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
return vg->pvid;
}
+static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
+{
+ return v->vid == pvid ? v->flags | BRIDGE_VLAN_INFO_PVID : v->flags;
+}
#else
static inline bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb,
- u16 *vid)
+ u16 *vid, u8 *state)
{
return true;
}
@@ -1103,6 +1180,70 @@ static inline int br_vlan_bridge_event(struct net_device *dev,
{
return 0;
}
+
+static inline void br_vlan_rtnl_init(void)
+{
+}
+
+static inline void br_vlan_rtnl_uninit(void)
+{
+}
+
+static inline void br_vlan_notify(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ u16 vid, u16 vid_range,
+ int cmd)
+{
+}
+#endif
+
+/* br_vlan_options.c */
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+bool br_vlan_opts_eq(const struct net_bridge_vlan *v1,
+ const struct net_bridge_vlan *v2);
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v);
+size_t br_vlan_opts_nl_size(void);
+int br_vlan_process_options(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct net_bridge_vlan *range_start,
+ struct net_bridge_vlan *range_end,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack);
+
+/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
+static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
+{
+ return READ_ONCE(v->state);
+}
+
+static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state)
+{
+ WRITE_ONCE(v->state, state);
+}
+
+static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg)
+{
+ return READ_ONCE(vg->pvid_state);
+}
+
+static inline void br_vlan_set_pvid_state(struct net_bridge_vlan_group *vg,
+ u8 state)
+{
+ WRITE_ONCE(vg->pvid_state, state);
+}
+
+/* learn_allow is true at ingress and false at egress */
+static inline bool br_vlan_state_allowed(u8 state, bool learn_allow)
+{
+ switch (state) {
+ case BR_STATE_LEARNING:
+ return learn_allow;
+ case BR_STATE_FORWARDING:
+ return true;
+ default:
+ return false;
+ }
+}
#endif
struct nf_br_ops {
@@ -1174,6 +1315,12 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags,
int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
u32 filter_mask, int nlflags);
+int br_process_vlan_info(struct net_bridge *br,
+ struct net_bridge_port *p, int cmd,
+ struct bridge_vlan_info *vinfo_curr,
+ struct bridge_vlan_info **vinfo_last,
+ bool *changed,
+ struct netlink_ext_ack *extack);
#ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1f1410f8d312..1f14b8455345 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -45,6 +45,17 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
br_info(p->br, "port %u(%s) entered %s state\n",
(unsigned int) p->port_no, p->dev->name,
br_port_state_names[p->state]);
+
+ if (p->br->stp_enabled == BR_KERNEL_STP) {
+ switch (p->state) {
+ case BR_STATE_BLOCKING:
+ p->stp_xstats.transition_blk++;
+ break;
+ case BR_STATE_FORWARDING:
+ p->stp_xstats.transition_fwd++;
+ break;
+ }
+ }
}
/* called under bridge lock */
@@ -52,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
{
struct net_bridge_port *p;
- list_for_each_entry_rcu(p, &br->port_list, list) {
+ list_for_each_entry_rcu(p, &br->port_list, list,
+ lockdep_is_held(&br->lock)) {
if (p->port_no == port_no)
return p;
}
@@ -484,6 +496,8 @@ void br_received_config_bpdu(struct net_bridge_port *p,
struct net_bridge *br;
int was_root;
+ p->stp_xstats.rx_bpdu++;
+
br = p->br;
was_root = br_is_root_bridge(br);
@@ -517,6 +531,8 @@ void br_received_config_bpdu(struct net_bridge_port *p,
/* called under bridge lock */
void br_received_tcn_bpdu(struct net_bridge_port *p)
{
+ p->stp_xstats.rx_tcn++;
+
if (br_is_designated_port(p)) {
br_info(p->br, "port %u(%s) received tcn bpdu\n",
(unsigned int) p->port_no, p->dev->name);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 7796dd9d42d7..0e4572f31330 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -118,6 +118,8 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
br_set_ticks(buf+33, bpdu->forward_delay);
br_send_bpdu(p, buf, 35);
+
+ p->stp_xstats.tx_bpdu++;
}
/* called under bridge lock */
@@ -133,6 +135,8 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
buf[2] = 0;
buf[3] = BPDU_TYPE_TCN;
br_send_bpdu(p, buf, 4);
+
+ p->stp_xstats.tx_tcn++;
}
/*
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bb98984cd27d..6b5deca08b89 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,13 +34,15 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
}
-static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg,
+ const struct net_bridge_vlan *v)
{
- if (vg->pvid == vid)
+ if (vg->pvid == v->vid)
return false;
smp_wmb();
- vg->pvid = vid;
+ br_vlan_set_pvid_state(vg, v->state);
+ vg->pvid = v->vid;
return true;
}
@@ -69,7 +71,7 @@ static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
vg = nbp_vlan_group(v->port);
if (flags & BRIDGE_VLAN_INFO_PVID)
- ret = __vlan_add_pvid(vg, v->vid);
+ ret = __vlan_add_pvid(vg, v);
else
ret = __vlan_delete_pvid(vg, v->vid);
@@ -257,6 +259,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
&changed, extack);
if (err)
goto out_filt;
+
+ if (changed)
+ br_vlan_notify(br, NULL, v->vid, 0,
+ RTM_NEWVLAN);
}
masterv = br_vlan_get_master(br, v->vid, extack);
@@ -289,6 +295,9 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
vg->num_vlans++;
}
+ /* set the state before publishing */
+ v->state = BR_STATE_FORWARDING;
+
err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode,
br_vlan_rht_params);
if (err)
@@ -380,13 +389,31 @@ static void __vlan_group_free(struct net_bridge_vlan_group *vg)
kfree(vg);
}
-static void __vlan_flush(struct net_bridge_vlan_group *vg)
+static void __vlan_flush(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct net_bridge_vlan_group *vg)
{
struct net_bridge_vlan *vlan, *tmp;
+ u16 v_start = 0, v_end = 0;
__vlan_delete_pvid(vg, vg->pvid);
- list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
+ list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) {
+ /* take care of disjoint ranges */
+ if (!v_start) {
+ v_start = vlan->vid;
+ } else if (vlan->vid - v_end != 1) {
+ /* found range end, notify and start next one */
+ br_vlan_notify(br, p, v_start, v_end, RTM_DELVLAN);
+ v_start = vlan->vid;
+ }
+ v_end = vlan->vid;
+
__vlan_del(vlan);
+ }
+
+ /* notify about the last/whole vlan range */
+ if (v_start)
+ br_vlan_notify(br, p, v_start, v_end, RTM_DELVLAN);
}
struct sk_buff *br_handle_vlan(struct net_bridge *br,
@@ -444,7 +471,8 @@ out:
/* Called under RCU */
static bool __allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
- struct sk_buff *skb, u16 *vid)
+ struct sk_buff *skb, u16 *vid,
+ u8 *state)
{
struct br_vlan_stats *stats;
struct net_bridge_vlan *v;
@@ -510,13 +538,25 @@ static bool __allowed_ingress(const struct net_bridge *br,
skb->vlan_tci |= pvid;
/* if stats are disabled we can avoid the lookup */
- if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED))
- return true;
+ if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
+ if (*state == BR_STATE_FORWARDING) {
+ *state = br_vlan_get_pvid_state(vg);
+ return br_vlan_state_allowed(*state, true);
+ } else {
+ return true;
+ }
+ }
}
v = br_vlan_find(vg, *vid);
if (!v || !br_vlan_should_use(v))
goto drop;
+ if (*state == BR_STATE_FORWARDING) {
+ *state = br_vlan_get_state(v);
+ if (!br_vlan_state_allowed(*state, true))
+ goto drop;
+ }
+
if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
stats = this_cpu_ptr(v->stats);
u64_stats_update_begin(&stats->syncp);
@@ -534,7 +574,7 @@ drop:
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid)
+ u16 *vid, u8 *state)
{
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
@@ -544,7 +584,7 @@ bool br_allowed_ingress(const struct net_bridge *br,
return true;
}
- return __allowed_ingress(br, vg, skb, vid);
+ return __allowed_ingress(br, vg, skb, vid, state);
}
/* Called under RCU. */
@@ -560,7 +600,8 @@ bool br_allowed_egress(struct net_bridge_vlan_group *vg,
br_vlan_get_tag(skb, &vid);
v = br_vlan_find(vg, vid);
- if (v && br_vlan_should_use(v))
+ if (v && br_vlan_should_use(v) &&
+ br_vlan_state_allowed(br_vlan_get_state(v), false))
return true;
return false;
@@ -571,6 +612,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
{
struct net_bridge_vlan_group *vg;
struct net_bridge *br = p->br;
+ struct net_bridge_vlan *v;
/* If filtering was disabled at input, let it pass. */
if (!br_opt_get(br, BROPT_VLAN_ENABLED))
@@ -585,13 +627,15 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
if (!*vid) {
*vid = br_get_pvid(vg);
- if (!*vid)
+ if (!*vid ||
+ !br_vlan_state_allowed(br_vlan_get_pvid_state(vg), true))
return false;
return true;
}
- if (br_vlan_find(vg, *vid))
+ v = br_vlan_find(vg, *vid);
+ if (v && br_vlan_state_allowed(br_vlan_get_state(v), true))
return true;
return false;
@@ -716,7 +760,7 @@ void br_vlan_flush(struct net_bridge *br)
ASSERT_RTNL();
vg = br_vlan_group(br);
- __vlan_flush(vg);
+ __vlan_flush(br, NULL, vg);
RCU_INIT_POINTER(br->vlgrp, NULL);
synchronize_rcu();
__vlan_group_free(vg);
@@ -925,12 +969,15 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br)
/* Disable default_pvid on all ports where it is still
* configured.
*/
- if (vlan_default_pvid(br_vlan_group(br), pvid))
- br_vlan_delete(br, pvid);
+ if (vlan_default_pvid(br_vlan_group(br), pvid)) {
+ if (!br_vlan_delete(br, pvid))
+ br_vlan_notify(br, NULL, pvid, 0, RTM_DELVLAN);
+ }
list_for_each_entry(p, &br->port_list, list) {
- if (vlan_default_pvid(nbp_vlan_group(p), pvid))
- nbp_vlan_delete(p, pvid);
+ if (vlan_default_pvid(nbp_vlan_group(p), pvid) &&
+ !nbp_vlan_delete(p, pvid))
+ br_vlan_notify(br, p, pvid, 0, RTM_DELVLAN);
}
br->default_pvid = 0;
@@ -972,7 +1019,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
&vlchange, extack);
if (err)
goto out;
- br_vlan_delete(br, old_pvid);
+
+ if (br_vlan_delete(br, old_pvid))
+ br_vlan_notify(br, NULL, old_pvid, 0, RTM_DELVLAN);
+ br_vlan_notify(br, NULL, pvid, 0, RTM_NEWVLAN);
set_bit(0, changed);
}
@@ -992,7 +1042,9 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
&vlchange, extack);
if (err)
goto err_port;
- nbp_vlan_delete(p, old_pvid);
+ if (nbp_vlan_delete(p, old_pvid))
+ br_vlan_notify(br, p, old_pvid, 0, RTM_DELVLAN);
+ br_vlan_notify(p->br, p, pvid, 0, RTM_NEWVLAN);
set_bit(p->port_no, changed);
}
@@ -1007,22 +1059,28 @@ err_port:
if (!test_bit(p->port_no, changed))
continue;
- if (old_pvid)
+ if (old_pvid) {
nbp_vlan_add(p, old_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
&vlchange, NULL);
+ br_vlan_notify(p->br, p, old_pvid, 0, RTM_NEWVLAN);
+ }
nbp_vlan_delete(p, pvid);
+ br_vlan_notify(br, p, pvid, 0, RTM_DELVLAN);
}
if (test_bit(0, changed)) {
- if (old_pvid)
+ if (old_pvid) {
br_vlan_add(br, old_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
BRIDGE_VLAN_INFO_BRENTRY,
&vlchange, NULL);
+ br_vlan_notify(br, NULL, old_pvid, 0, RTM_NEWVLAN);
+ }
br_vlan_delete(br, pvid);
+ br_vlan_notify(br, NULL, pvid, 0, RTM_DELVLAN);
}
goto out;
}
@@ -1115,6 +1173,7 @@ int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack)
&changed, extack);
if (ret)
goto err_vlan_add;
+ br_vlan_notify(p->br, p, p->br->default_pvid, 0, RTM_NEWVLAN);
}
out:
return ret;
@@ -1196,7 +1255,7 @@ void nbp_vlan_flush(struct net_bridge_port *port)
ASSERT_RTNL();
vg = nbp_vlan_group(port);
- __vlan_flush(vg);
+ __vlan_flush(port->br, port, vg);
RCU_INIT_POINTER(port->vlgrp, NULL);
synchronize_rcu();
__vlan_group_free(vg);
@@ -1462,8 +1521,8 @@ int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
{
struct netdev_notifier_changeupper_info *info;
struct net_bridge *br = netdev_priv(dev);
- bool changed;
- int ret = 0;
+ int vlcmd = 0, ret = 0;
+ bool changed = false;
switch (event) {
case NETDEV_REGISTER:
@@ -1471,9 +1530,11 @@ int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
+ vlcmd = RTM_NEWVLAN;
break;
case NETDEV_UNREGISTER:
- br_vlan_delete(br, br->default_pvid);
+ changed = !br_vlan_delete(br, br->default_pvid);
+ vlcmd = RTM_DELVLAN;
break;
case NETDEV_CHANGEUPPER:
info = ptr;
@@ -1487,6 +1548,8 @@ int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
br_vlan_link_state_change(dev, br);
break;
}
+ if (changed)
+ br_vlan_notify(br, NULL, br->default_pvid, 0, vlcmd);
return ret;
}
@@ -1505,3 +1568,441 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event)
break;
}
}
+
+/* v_opts is used to dump the options which must be equal in the whole range */
+static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts,
+ u16 flags)
+{
+ struct bridge_vlan_info info;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, BRIDGE_VLANDB_ENTRY);
+ if (!nest)
+ return false;
+
+ memset(&info, 0, sizeof(info));
+ info.vid = vid;
+ if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
+ info.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+ if (flags & BRIDGE_VLAN_INFO_PVID)
+ info.flags |= BRIDGE_VLAN_INFO_PVID;
+
+ if (nla_put(skb, BRIDGE_VLANDB_ENTRY_INFO, sizeof(info), &info))
+ goto out_err;
+
+ if (vid_range && vid < vid_range &&
+ !(flags & BRIDGE_VLAN_INFO_PVID) &&
+ nla_put_u16(skb, BRIDGE_VLANDB_ENTRY_RANGE, vid_range))
+ goto out_err;
+
+ if (v_opts && !br_vlan_opts_fill(skb, v_opts))
+ goto out_err;
+
+ nla_nest_end(skb, nest);
+
+ return true;
+
+out_err:
+ nla_nest_cancel(skb, nest);
+ return false;
+}
+
+static size_t rtnl_vlan_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct br_vlan_msg))
+ + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY */
+ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_ENTRY_RANGE */
+ + nla_total_size(sizeof(struct bridge_vlan_info)) /* BRIDGE_VLANDB_ENTRY_INFO */
+ + br_vlan_opts_nl_size(); /* bridge vlan options */
+}
+
+void br_vlan_notify(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ u16 vid, u16 vid_range,
+ int cmd)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v = NULL;
+ struct br_vlan_msg *bvm;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+ struct net *net;
+ u16 flags = 0;
+ int ifindex;
+
+ /* right now notifications are done only with rtnl held */
+ ASSERT_RTNL();
+
+ if (p) {
+ ifindex = p->dev->ifindex;
+ vg = nbp_vlan_group(p);
+ net = dev_net(p->dev);
+ } else {
+ ifindex = br->dev->ifindex;
+ vg = br_vlan_group(br);
+ net = dev_net(br->dev);
+ }
+
+ skb = nlmsg_new(rtnl_vlan_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ goto out_err;
+
+ err = -EMSGSIZE;
+ nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*bvm), 0);
+ if (!nlh)
+ goto out_err;
+ bvm = nlmsg_data(nlh);
+ memset(bvm, 0, sizeof(*bvm));
+ bvm->family = AF_BRIDGE;
+ bvm->ifindex = ifindex;
+
+ switch (cmd) {
+ case RTM_NEWVLAN:
+ /* need to find the vlan due to flags/options */
+ v = br_vlan_find(vg, vid);
+ if (!v || !br_vlan_should_use(v))
+ goto out_kfree;
+
+ flags = v->flags;
+ if (br_get_pvid(vg) == v->vid)
+ flags |= BRIDGE_VLAN_INFO_PVID;
+ break;
+ case RTM_DELVLAN:
+ break;
+ default:
+ goto out_kfree;
+ }
+
+ if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags))
+ goto out_err;
+
+ nlmsg_end(skb, nlh);
+ rtnl_notify(skb, net, 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL);
+ return;
+
+out_err:
+ rtnl_set_sk_err(net, RTNLGRP_BRVLAN, err);
+out_kfree:
+ kfree_skb(skb);
+}
+
+/* check if v_curr can enter a range ending in range_end */
+bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *range_end)
+{
+ return v_curr->vid - range_end->vid == 1 &&
+ range_end->flags == v_curr->flags &&
+ br_vlan_opts_eq(v_curr, range_end);
+}
+
+static int br_vlan_dump_dev(const struct net_device *dev,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL;
+ struct net_bridge_vlan_group *vg;
+ int idx = 0, s_idx = cb->args[1];
+ struct nlmsghdr *nlh = NULL;
+ struct net_bridge_port *p;
+ struct br_vlan_msg *bvm;
+ struct net_bridge *br;
+ int err = 0;
+ u16 pvid;
+
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
+ return -EINVAL;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ vg = br_vlan_group_rcu(br);
+ p = NULL;
+ } else {
+ p = br_port_get_rcu(dev);
+ if (WARN_ON(!p))
+ return -EINVAL;
+ vg = nbp_vlan_group_rcu(p);
+ br = p->br;
+ }
+
+ if (!vg)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWVLAN, sizeof(*bvm), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+ bvm = nlmsg_data(nlh);
+ memset(bvm, 0, sizeof(*bvm));
+ bvm->family = PF_BRIDGE;
+ bvm->ifindex = dev->ifindex;
+ pvid = br_get_pvid(vg);
+
+ /* idx must stay at range's beginning until it is filled in */
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
+ if (!br_vlan_should_use(v))
+ continue;
+ if (idx < s_idx) {
+ idx++;
+ continue;
+ }
+
+ if (!range_start) {
+ range_start = v;
+ range_end = v;
+ continue;
+ }
+
+ if (v->vid == pvid || !br_vlan_can_enter_range(v, range_end)) {
+ u16 flags = br_vlan_flags(range_start, pvid);
+
+ if (!br_vlan_fill_vids(skb, range_start->vid,
+ range_end->vid, range_start,
+ flags)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ /* advance number of filled vlans */
+ idx += range_end->vid - range_start->vid + 1;
+
+ range_start = v;
+ }
+ range_end = v;
+ }
+
+ /* err will be 0 and range_start will be set in 3 cases here:
+ * - first vlan (range_start == range_end)
+ * - last vlan (range_start == range_end, not in range)
+ * - last vlan range (range_start != range_end, in range)
+ */
+ if (!err && range_start &&
+ !br_vlan_fill_vids(skb, range_start->vid, range_end->vid,
+ range_start, br_vlan_flags(range_start, pvid)))
+ err = -EMSGSIZE;
+
+ cb->args[1] = err ? idx : 0;
+
+ nlmsg_end(skb, nlh);
+
+ return err;
+}
+
+static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx = 0, err = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ struct br_vlan_msg *bvm;
+ struct net_device *dev;
+
+ err = nlmsg_parse(cb->nlh, sizeof(*bvm), NULL, 0, NULL, cb->extack);
+ if (err < 0)
+ return err;
+
+ bvm = nlmsg_data(cb->nlh);
+
+ rcu_read_lock();
+ if (bvm->ifindex) {
+ dev = dev_get_by_index_rcu(net, bvm->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ err = br_vlan_dump_dev(dev, skb, cb);
+ if (err && err != -EMSGSIZE)
+ goto out_err;
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ if (idx < s_idx)
+ goto skip;
+
+ err = br_vlan_dump_dev(dev, skb, cb);
+ if (err == -EMSGSIZE)
+ break;
+skip:
+ idx++;
+ }
+ }
+ cb->args[0] = idx;
+ rcu_read_unlock();
+
+ return skb->len;
+
+out_err:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = {
+ [BRIDGE_VLANDB_ENTRY_INFO] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct bridge_vlan_info) },
+ [BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
+};
+
+static int br_vlan_rtm_process_one(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd, struct netlink_ext_ack *extack)
+{
+ struct bridge_vlan_info *vinfo, vrange_end, *vinfo_last = NULL;
+ struct nlattr *tb[BRIDGE_VLANDB_ENTRY_MAX + 1];
+ bool changed = false, skip_processing = false;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p = NULL;
+ int err = 0, cmdmap = 0;
+ struct net_bridge *br;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (WARN_ON(!p))
+ return -ENODEV;
+ br = p->br;
+ vg = nbp_vlan_group(p);
+ }
+
+ if (WARN_ON(!vg))
+ return -ENODEV;
+
+ err = nla_parse_nested(tb, BRIDGE_VLANDB_ENTRY_MAX, attr,
+ br_vlan_db_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[BRIDGE_VLANDB_ENTRY_INFO]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry info");
+ return -EINVAL;
+ }
+ memset(&vrange_end, 0, sizeof(vrange_end));
+
+ vinfo = nla_data(tb[BRIDGE_VLANDB_ENTRY_INFO]);
+ if (vinfo->flags & (BRIDGE_VLAN_INFO_RANGE_BEGIN |
+ BRIDGE_VLAN_INFO_RANGE_END)) {
+ NL_SET_ERR_MSG_MOD(extack, "Old-style vlan ranges are not allowed when using RTM vlan calls");
+ return -EINVAL;
+ }
+ if (!br_vlan_valid_id(vinfo->vid, extack))
+ return -EINVAL;
+
+ if (tb[BRIDGE_VLANDB_ENTRY_RANGE]) {
+ vrange_end.vid = nla_get_u16(tb[BRIDGE_VLANDB_ENTRY_RANGE]);
+ /* validate user-provided flags without RANGE_BEGIN */
+ vrange_end.flags = BRIDGE_VLAN_INFO_RANGE_END | vinfo->flags;
+ vinfo->flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
+
+ /* vinfo_last is the range start, vinfo the range end */
+ vinfo_last = vinfo;
+ vinfo = &vrange_end;
+
+ if (!br_vlan_valid_id(vinfo->vid, extack) ||
+ !br_vlan_valid_range(vinfo, vinfo_last, extack))
+ return -EINVAL;
+ }
+
+ switch (cmd) {
+ case RTM_NEWVLAN:
+ cmdmap = RTM_SETLINK;
+ skip_processing = !!(vinfo->flags & BRIDGE_VLAN_INFO_ONLY_OPTS);
+ break;
+ case RTM_DELVLAN:
+ cmdmap = RTM_DELLINK;
+ break;
+ }
+
+ if (!skip_processing) {
+ struct bridge_vlan_info *tmp_last = vinfo_last;
+
+ /* br_process_vlan_info may overwrite vinfo_last */
+ err = br_process_vlan_info(br, p, cmdmap, vinfo, &tmp_last,
+ &changed, extack);
+
+ /* notify first if anything changed */
+ if (changed)
+ br_ifinfo_notify(cmdmap, br, p);
+
+ if (err)
+ return err;
+ }
+
+ /* deal with options */
+ if (cmd == RTM_NEWVLAN) {
+ struct net_bridge_vlan *range_start, *range_end;
+
+ if (vinfo_last) {
+ range_start = br_vlan_find(vg, vinfo_last->vid);
+ range_end = br_vlan_find(vg, vinfo->vid);
+ } else {
+ range_start = br_vlan_find(vg, vinfo->vid);
+ range_end = range_start;
+ }
+
+ err = br_vlan_process_options(br, p, range_start, range_end,
+ tb, extack);
+ }
+
+ return err;
+}
+
+static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct br_vlan_msg *bvm;
+ struct net_device *dev;
+ struct nlattr *attr;
+ int err, vlans = 0;
+ int rem;
+
+ /* this should validate the header and check for remaining bytes */
+ err = nlmsg_parse(nlh, sizeof(*bvm), NULL, BRIDGE_VLANDB_MAX, NULL,
+ extack);
+ if (err < 0)
+ return err;
+
+ bvm = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, bvm->ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "The device is not a valid bridge or bridge port");
+ return -EINVAL;
+ }
+
+ nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) {
+ if (nla_type(attr) != BRIDGE_VLANDB_ENTRY)
+ continue;
+
+ vlans++;
+ err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type,
+ extack);
+ if (err)
+ break;
+ }
+ if (!vlans) {
+ NL_SET_ERR_MSG_MOD(extack, "No vlans found to process");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+void br_vlan_rtnl_init(void)
+{
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETVLAN, NULL,
+ br_vlan_rtm_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWVLAN,
+ br_vlan_rtm_process, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELVLAN,
+ br_vlan_rtm_process, NULL, 0);
+}
+
+void br_vlan_rtnl_uninit(void)
+{
+ rtnl_unregister(PF_BRIDGE, RTM_GETVLAN);
+ rtnl_unregister(PF_BRIDGE, RTM_NEWVLAN);
+ rtnl_unregister(PF_BRIDGE, RTM_DELVLAN);
+}
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
new file mode 100644
index 000000000000..cd2eb194eb98
--- /dev/null
+++ b/net/bridge/br_vlan_options.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2020, Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "br_private.h"
+
+/* check if the options between two vlans are equal */
+bool br_vlan_opts_eq(const struct net_bridge_vlan *v1,
+ const struct net_bridge_vlan *v2)
+{
+ return v1->state == v2->state;
+}
+
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
+{
+ return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE,
+ br_vlan_get_state(v));
+}
+
+size_t br_vlan_opts_nl_size(void)
+{
+ return nla_total_size(sizeof(u8)); /* BRIDGE_VLANDB_ENTRY_STATE */
+}
+
+static int br_vlan_modify_state(struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
+ u8 state,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br;
+
+ ASSERT_RTNL();
+
+ if (state > BR_STATE_BLOCKING) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid vlan state");
+ return -EINVAL;
+ }
+
+ if (br_vlan_is_brentry(v))
+ br = v->br;
+ else
+ br = v->port->br;
+
+ if (br->stp_enabled == BR_KERNEL_STP) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't modify vlan state when using kernel STP");
+ return -EBUSY;
+ }
+
+ if (v->state == state)
+ return 0;
+
+ if (v->vid == br_get_pvid(vg))
+ br_vlan_set_pvid_state(vg, state);
+
+ br_vlan_set_state(v, state);
+ *changed = true;
+
+ return 0;
+}
+
+static int br_vlan_process_one_opts(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
+ struct nlattr **tb,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ *changed = false;
+ if (tb[BRIDGE_VLANDB_ENTRY_STATE]) {
+ u8 state = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_STATE]);
+
+ err = br_vlan_modify_state(vg, v, state, changed, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int br_vlan_process_options(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct net_bridge_vlan *range_start,
+ struct net_bridge_vlan *range_end,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL;
+ struct net_bridge_vlan_group *vg;
+ int vid, err = 0;
+ u16 pvid;
+
+ if (p)
+ vg = nbp_vlan_group(p);
+ else
+ vg = br_vlan_group(br);
+
+ if (!range_start || !br_vlan_should_use(range_start)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan range start doesn't exist, can't process options");
+ return -ENOENT;
+ }
+ if (!range_end || !br_vlan_should_use(range_end)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan range end doesn't exist, can't process options");
+ return -ENOENT;
+ }
+
+ pvid = br_get_pvid(vg);
+ for (vid = range_start->vid; vid <= range_end->vid; vid++) {
+ bool changed = false;
+
+ v = br_vlan_find(vg, vid);
+ if (!v || !br_vlan_should_use(v)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process options");
+ err = -ENOENT;
+ break;
+ }
+
+ err = br_vlan_process_one_opts(br, p, vg, v, tb, &changed,
+ extack);
+ if (err)
+ break;
+
+ if (changed) {
+ /* vlan options changed, check for range */
+ if (!curr_start) {
+ curr_start = v;
+ curr_end = v;
+ continue;
+ }
+
+ if (v->vid == pvid ||
+ !br_vlan_can_enter_range(v, curr_end)) {
+ br_vlan_notify(br, p, curr_start->vid,
+ curr_end->vid, RTM_NEWVLAN);
+ curr_start = v;
+ }
+ curr_end = v;
+ } else {
+ /* nothing changed and nothing to notify yet */
+ if (!curr_start)
+ continue;
+
+ br_vlan_notify(br, p, curr_start->vid, curr_end->vid,
+ RTM_NEWVLAN);
+ curr_start = NULL;
+ curr_end = NULL;
+ }
+ }
+ if (curr_start)
+ br_vlan_notify(br, p, curr_start->vid, curr_end->vid,
+ RTM_NEWVLAN);
+
+ return err;
+}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 03c7cdd8e4cb..195d2d67be8a 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -112,7 +112,8 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
caif_device_list(dev_net(dev));
struct caif_device_entry *caifd;
- list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
+ list_for_each_entry_rcu(caifd, &caifdevs->list, list,
+ lockdep_rtnl_is_held()) {
if (caifd->netdev == dev)
return caifd;
}
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 76bd67891fb3..a0116b9503d9 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -62,7 +62,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
hpad = (info->hdr_len + CFUSB_PAD_DESCR_SZ) & (CFUSB_ALIGNMENT - 1);
if (skb_headroom(skb) < ETH_HLEN + CFUSB_PAD_DESCR_SZ + hpad) {
- pr_warn("Headroom to small\n");
+ pr_warn("Headroom too small\n");
kfree_skb(skb);
return -EIO;
}
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 59d0ba2072de..ce09bb4fb249 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -13,5 +13,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
auth.o auth_none.o \
crypto.o armor.o \
auth_x.o \
- ceph_fs.o ceph_strings.o ceph_hash.o \
+ ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a9d6c97b5b0d..a0e97f6c1072 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -269,7 +269,7 @@ enum {
Opt_abort_on_full,
};
-static const struct fs_parameter_spec ceph_param_specs[] = {
+static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
@@ -283,18 +283,13 @@ static const struct fs_parameter_spec ceph_param_specs[] = {
fsparam_u32 ("osd_request_timeout", Opt_osd_request_timeout),
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
__fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
- fs_param_deprecated),
+ fs_param_deprecated, NULL),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
{}
};
-static const struct fs_parameter_description ceph_parameters = {
- .name = "libceph",
- .specs = ceph_param_specs,
-};
-
struct ceph_options *ceph_alloc_options(void)
{
struct ceph_options *opt;
@@ -337,7 +332,7 @@ EXPORT_SYMBOL(ceph_destroy_options);
/* get secret from key store */
static int get_secret(struct ceph_crypto_key *dst, const char *name,
- struct fs_context *fc)
+ struct p_log *log)
{
struct key *ukey;
int key_err;
@@ -351,19 +346,19 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name,
key_err = PTR_ERR(ukey);
switch (key_err) {
case -ENOKEY:
- errorf(fc, "libceph: Failed due to key not found: %s",
+ error_plog(log, "Failed due to key not found: %s",
name);
break;
case -EKEYEXPIRED:
- errorf(fc, "libceph: Failed due to expired key: %s",
+ error_plog(log, "Failed due to expired key: %s",
name);
break;
case -EKEYREVOKED:
- errorf(fc, "libceph: Failed due to revoked key: %s",
+ error_plog(log, "Failed due to revoked key: %s",
name);
break;
default:
- errorf(fc, "libceph: Failed due to key error %d: %s",
+ error_plog(log, "Failed due to key error %d: %s",
key_err, name);
}
err = -EPERM;
@@ -383,15 +378,16 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fs_context *fc)
+ struct fc_log *l)
{
+ struct p_log log = {.prefix = "libceph", .log = l};
int ret;
/* ip1[:port1][,ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
&opt->num_mon);
if (ret) {
- errorf(fc, "libceph: Failed to parse monitor IPs: %d", ret);
+ error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
}
@@ -400,12 +396,13 @@ int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
EXPORT_SYMBOL(ceph_parse_mon_ips);
int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
- struct fs_context *fc)
+ struct fc_log *l)
{
struct fs_parse_result result;
int token, err;
+ struct p_log log = {.prefix = "libceph", .log = l};
- token = fs_parse(fc, &ceph_parameters, param, &result);
+ token = __fs_parse(&log, ceph_parameters, param, &result);
dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
if (token < 0)
return token;
@@ -417,7 +414,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
&opt->my_addr,
1, NULL);
if (err) {
- errorf(fc, "libceph: Failed to parse ip: %d", err);
+ error_plog(&log, "Failed to parse ip: %d", err);
return err;
}
opt->flags |= CEPH_OPT_MYIP;
@@ -426,7 +423,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_fsid:
err = parse_fsid(param->string, &opt->fsid);
if (err) {
- errorf(fc, "libceph: Failed to parse fsid: %d", err);
+ error_plog(&log, "Failed to parse fsid: %d", err);
return err;
}
opt->flags |= CEPH_OPT_FSID;
@@ -445,7 +442,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
return -ENOMEM;
err = ceph_crypto_key_unarmor(opt->key, param->string);
if (err) {
- errorf(fc, "libceph: Failed to parse secret: %d", err);
+ error_plog(&log, "Failed to parse secret: %d", err);
return err;
}
break;
@@ -456,10 +453,10 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
if (!opt->key)
return -ENOMEM;
- return get_secret(opt->key, param->string, fc);
+ return get_secret(opt->key, param->string, &log);
case Opt_osdtimeout:
- warnf(fc, "libceph: Ignoring osdtimeout");
+ warn_plog(&log, "Ignoring osdtimeout");
break;
case Opt_osdkeepalivetimeout:
/* 0 isn't well defined right now, reject it */
@@ -530,7 +527,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
return 0;
out_of_range:
- return invalf(fc, "libceph: %s out of range", param->key);
+ return inval_plog(&log, "%s out of range", param->key);
}
EXPORT_SYMBOL(ceph_parse_param);
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
deleted file mode 100644
index 756a2dc10d27..000000000000
--- a/net/ceph/ceph_fs.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Some non-inline ceph helpers
- */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
-
-/*
- * return true if @layout appears to be valid
- */
-int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
-{
- __u32 su = layout->stripe_unit;
- __u32 sc = layout->stripe_count;
- __u32 os = layout->object_size;
-
- /* stripe unit, object size must be non-zero, 64k increment */
- if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
- return 0;
- if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
- return 0;
- /* object size must be a multiple of stripe unit */
- if (os < su || os % su)
- return 0;
- /* stripe count must be non-zero */
- if (!sc)
- return 0;
- return 1;
-}
-
-void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
- struct ceph_file_layout_legacy *legacy)
-{
- fl->stripe_unit = le32_to_cpu(legacy->fl_stripe_unit);
- fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
- fl->object_size = le32_to_cpu(legacy->fl_object_size);
- fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
- if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
- fl->stripe_count == 0 && fl->object_size == 0)
- fl->pool_id = -1;
-}
-EXPORT_SYMBOL(ceph_file_layout_from_legacy);
-
-void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
- struct ceph_file_layout_legacy *legacy)
-{
- legacy->fl_stripe_unit = cpu_to_le32(fl->stripe_unit);
- legacy->fl_stripe_count = cpu_to_le32(fl->stripe_count);
- legacy->fl_object_size = cpu_to_le32(fl->object_size);
- if (fl->pool_id >= 0)
- legacy->fl_pg_pool = cpu_to_le32(fl->pool_id);
- else
- legacy->fl_pg_pool = 0;
-}
-EXPORT_SYMBOL(ceph_file_layout_to_legacy);
-
-int ceph_flags_to_mode(int flags)
-{
- int mode;
-
-#ifdef O_DIRECTORY /* fixme */
- if ((flags & O_DIRECTORY) == O_DIRECTORY)
- return CEPH_FILE_MODE_PIN;
-#endif
-
- switch (flags & O_ACCMODE) {
- case O_WRONLY:
- mode = CEPH_FILE_MODE_WR;
- break;
- case O_RDONLY:
- mode = CEPH_FILE_MODE_RD;
- break;
- case O_RDWR:
- case O_ACCMODE: /* this is what the VFS does */
- mode = CEPH_FILE_MODE_RDWR;
- break;
- }
-#ifdef O_LAZY
- if (flags & O_LAZY)
- mode |= CEPH_FILE_MODE_LAZY;
-#endif
-
- return mode;
-}
-EXPORT_SYMBOL(ceph_flags_to_mode);
-
-int ceph_caps_for_mode(int mode)
-{
- int caps = CEPH_CAP_PIN;
-
- if (mode & CEPH_FILE_MODE_RD)
- caps |= CEPH_CAP_FILE_SHARED |
- CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
- if (mode & CEPH_FILE_MODE_WR)
- caps |= CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
- CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
- CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
- if (mode & CEPH_FILE_MODE_LAZY)
- caps |= CEPH_CAP_FILE_LAZYIO;
-
- return caps;
-}
-EXPORT_SYMBOL(ceph_caps_for_mode);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ba45b074a362..b68b376d8c2f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -402,7 +402,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
case CEPH_OSD_OP_LIST_WATCHERS:
ceph_osd_data_release(&op->list_watchers.response_data);
break;
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
ceph_osd_data_release(&op->copy_from.osd_data);
break;
default:
@@ -697,7 +697,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
case CEPH_OSD_OP_SETXATTR:
case CEPH_OSD_OP_CMPXATTR:
case CEPH_OSD_OP_NOTIFY_ACK:
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
*num_request_data_items += 1;
break;
@@ -1029,7 +1029,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_CREATE:
case CEPH_OSD_OP_DELETE:
break;
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
dst->copy_from.src_version =
cpu_to_le64(src->copy_from.src_version);
@@ -1966,7 +1966,7 @@ static void setup_request_data(struct ceph_osd_request *req)
ceph_osdc_msg_data_add(request_msg,
&op->notify_ack.request_data);
break;
- case CEPH_OSD_OP_COPY_FROM:
+ case CEPH_OSD_OP_COPY_FROM2:
ceph_osdc_msg_data_add(request_msg,
&op->copy_from.osd_data);
break;
@@ -5315,6 +5315,7 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
struct ceph_object_locator *src_oloc,
u32 src_fadvise_flags,
u32 dst_fadvise_flags,
+ u32 truncate_seq, u64 truncate_size,
u8 copy_from_flags)
{
struct ceph_osd_req_op *op;
@@ -5325,7 +5326,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
if (IS_ERR(pages))
return PTR_ERR(pages);
- op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+ op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2,
+ dst_fadvise_flags);
op->copy_from.snapid = src_snapid;
op->copy_from.src_version = src_version;
op->copy_from.flags = copy_from_flags;
@@ -5335,6 +5337,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
end = p + PAGE_SIZE;
ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
encode_oloc(&p, end, src_oloc);
+ ceph_encode_32(&p, truncate_seq);
+ ceph_encode_64(&p, truncate_size);
op->indata_len = PAGE_SIZE - (end - p);
ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
@@ -5350,6 +5354,7 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
struct ceph_object_id *dst_oid,
struct ceph_object_locator *dst_oloc,
u32 dst_fadvise_flags,
+ u32 truncate_seq, u64 truncate_size,
u8 copy_from_flags)
{
struct ceph_osd_request *req;
@@ -5366,7 +5371,8 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
src_oloc, src_fadvise_flags,
- dst_fadvise_flags, copy_from_flags);
+ dst_fadvise_flags, truncate_seq,
+ truncate_size, copy_from_flags);
if (ret)
goto out;
diff --git a/net/core/Makefile b/net/core/Makefile
index a104dc8faafc..3e2c378e5f31 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o xdp.o flow_offload.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 458be6b3eda9..3ab23f698221 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
+ nbuckets = roundup_pow_of_two(num_possible_cpus());
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
- smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus())));
- nbuckets = 1U << smap->bucket_log;
+ nbuckets = max_t(u32, 2, nbuckets);
+ smap->bucket_log = ilog2(nbuckets);
cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
ret = bpf_map_charge_init(&smap->map.memory, cost);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index da3c24ed129c..a78e7f864c1e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -84,7 +84,8 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i
/*
* Wait for the last received packet to be different from skb
*/
-int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
+ int *err, long *timeo_p,
const struct sk_buff *skb)
{
int error;
@@ -97,7 +98,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
if (error)
goto out_err;
- if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
+ if (READ_ONCE(queue->prev) != skb)
goto out;
/* Socket shut down? */
@@ -209,6 +210,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
/**
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
+ * @queue: socket queue from which to receive
* @flags: MSG\_ flags
* @destructor: invoked under the receive lock on successful dequeue
* @off: an offset in bytes to peek skb from. Returns an offset
@@ -241,13 +243,14 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
-struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
+ struct sk_buff_head *queue,
+ unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
int *off, int *err,
struct sk_buff **last)
{
- struct sk_buff_head *queue = &sk->sk_receive_queue;
struct sk_buff *skb;
unsigned long cpu_flags;
/*
@@ -278,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
- } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
+ } while (READ_ONCE(queue->prev) != *last);
error = -EAGAIN;
@@ -288,7 +291,9 @@ no_packet:
}
EXPORT_SYMBOL(__skb_try_recv_datagram);
-struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+struct sk_buff *__skb_recv_datagram(struct sock *sk,
+ struct sk_buff_head *sk_queue,
+ unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
int *off, int *err)
@@ -299,15 +304,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
- skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
- &last);
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor,
+ off, err, &last);
if (skb)
return skb;
if (*err != -EAGAIN)
break;
} while (timeo &&
- !__skb_wait_for_more_packets(sk, err, &timeo, last));
+ !__skb_wait_for_more_packets(sk, sk_queue, err,
+ &timeo, last));
return NULL;
}
@@ -318,7 +324,8 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
{
int off = 0;
- return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+ return __skb_recv_datagram(sk, &sk->sk_receive_queue,
+ flags | (noblock ? MSG_DONTWAIT : 0),
NULL, &off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7e885d069707..c6c985fe7b1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,7 +146,6 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
-#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
name_node = netdev_name_node_lookup(net, name);
if (!name_node)
return -ENOENT;
+ /* lookup might have found our primary name or a name belonging
+ * to another device.
+ */
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -928,7 +933,7 @@ EXPORT_SYMBOL(dev_get_by_napi_id);
*
* The use of raw_seqcount_begin() and cond_resched() before
* retrying is required as we want to give the writers a chance
- * to complete when CONFIG_PREEMPT is not set.
+ * to complete when CONFIG_PREEMPTION is not set.
*/
int netdev_get_name(struct net *net, char *name, int ifindex)
{
@@ -1764,7 +1769,6 @@ EXPORT_SYMBOL(register_netdevice_notifier);
int unregister_netdevice_notifier(struct notifier_block *nb)
{
- struct net_device *dev;
struct net *net;
int err;
@@ -1775,16 +1779,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
if (err)
goto unlock;
- for_each_net(net) {
- for_each_netdev(net, dev) {
- if (dev->flags & IFF_UP) {
- call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
- dev);
- call_netdevice_notifier(nb, NETDEV_DOWN, dev);
- }
- call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
- }
- }
+ for_each_net(net)
+ call_netdevice_unregister_net_notifiers(nb, net);
+
unlock:
rtnl_unlock();
up_write(&pernet_ops_rwsem);
@@ -1792,6 +1789,42 @@ unlock:
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
+static int __register_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb,
+ bool ignore_call_fail)
+{
+ int err;
+
+ err = raw_notifier_chain_register(&net->netdev_chain, nb);
+ if (err)
+ return err;
+ if (dev_boot_phase)
+ return 0;
+
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err && !ignore_call_fail)
+ goto chain_unregister;
+
+ return 0;
+
+chain_unregister:
+ raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ return err;
+}
+
+static int __unregister_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb)
+{
+ int err;
+
+ err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ if (err)
+ return err;
+
+ call_netdevice_unregister_net_notifiers(nb, net);
+ return 0;
+}
+
/**
* register_netdevice_notifier_net - register a per-netns network notifier block
* @net: network namespace
@@ -1812,23 +1845,9 @@ int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
int err;
rtnl_lock();
- err = raw_notifier_chain_register(&net->netdev_chain, nb);
- if (err)
- goto unlock;
- if (dev_boot_phase)
- goto unlock;
-
- err = call_netdevice_register_net_notifiers(nb, net);
- if (err)
- goto chain_unregister;
-
-unlock:
+ err = __register_netdevice_notifier_net(net, nb, false);
rtnl_unlock();
return err;
-
-chain_unregister:
- raw_notifier_chain_unregister(&netdev_chain, nb);
- goto unlock;
}
EXPORT_SYMBOL(register_netdevice_notifier_net);
@@ -1854,17 +1873,53 @@ int unregister_netdevice_notifier_net(struct net *net,
int err;
rtnl_lock();
- err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
- if (err)
- goto unlock;
+ err = __unregister_netdevice_notifier_net(net, nb);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_net);
- call_netdevice_unregister_net_notifiers(nb, net);
+int register_netdevice_notifier_dev_net(struct net_device *dev,
+ struct notifier_block *nb,
+ struct netdev_net_notifier *nn)
+{
+ int err;
-unlock:
+ rtnl_lock();
+ err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
+ if (!err) {
+ nn->nb = nb;
+ list_add(&nn->list, &dev->net_notifier_list);
+ }
rtnl_unlock();
return err;
}
-EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
+
+int unregister_netdevice_notifier_dev_net(struct net_device *dev,
+ struct notifier_block *nb,
+ struct netdev_net_notifier *nn)
+{
+ int err;
+
+ rtnl_lock();
+ list_del(&nn->list);
+ err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
+
+static void move_netdevice_notifiers_dev_net(struct net_device *dev,
+ struct net *net)
+{
+ struct netdev_net_notifier *nn;
+
+ list_for_each_entry(nn, &dev->net_notifier_list, list) {
+ __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
+ __register_netdevice_notifier_net(net, nn->nb, true);
+ }
+}
/**
* call_netdevice_notifiers_info - call all network notifier blocks
@@ -3021,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev,
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
+ if (hash >= qoffset)
+ hash -= qoffset;
while (unlikely(hash >= qcount))
hash -= qcount;
return hash + qoffset;
@@ -3249,7 +3306,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+ if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -3607,26 +3664,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
- qdisc_run_begin(q)) {
- if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state))) {
- __qdisc_drop(skb, &to_free);
- rc = NET_XMIT_DROP;
- goto end_run;
- }
- qdisc_bstats_cpu_update(q, skb);
-
- rc = NET_XMIT_SUCCESS;
- if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
- __qdisc_run(q);
-
-end_run:
- qdisc_run_end(q);
- } else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- qdisc_run(q);
- }
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ qdisc_run(q);
if (unlikely(to_free))
kfree_skb_list(to_free);
@@ -4477,14 +4516,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_cloned(skb) || skb_is_tc_redirected(skb))
+ if (skb_is_tc_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
- if (skb_is_nonlinear(skb) ||
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
@@ -4932,7 +4971,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
-#ifdef CONFIG_NETFILTER_INGRESS
if (nf_hook_ingress_active(skb)) {
int ingress_retval;
@@ -4946,7 +4984,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
rcu_read_unlock();
return ingress_retval;
}
-#endif /* CONFIG_NETFILTER_INGRESS */
return 0;
}
@@ -5491,9 +5528,29 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
-static int napi_gro_complete(struct sk_buff *skb)
+static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5526,7 +5583,8 @@ static int napi_gro_complete(struct sk_buff *skb)
}
out:
- return netif_receive_skb_internal(skb);
+ gro_normal_one(napi, skb);
+ return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -5539,7 +5597,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return;
skb_list_del_init(skb);
- napi_gro_complete(skb);
+ napi_gro_complete(napi, skb);
napi->gro_hash[index].count--;
}
@@ -5641,7 +5699,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
}
}
-static void gro_flush_oldest(struct list_head *head)
+static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
{
struct sk_buff *oldest;
@@ -5657,7 +5715,7 @@ static void gro_flush_oldest(struct list_head *head)
* SKB to the chain.
*/
skb_list_del_init(oldest);
- napi_gro_complete(oldest);
+ napi_gro_complete(napi, oldest);
}
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
@@ -5723,7 +5781,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (&ptype->list == head)
goto normal;
- if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
+ if (PTR_ERR(pp) == -EINPROGRESS) {
ret = GRO_CONSUMED;
goto ok;
}
@@ -5733,7 +5791,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (pp) {
skb_list_del_init(pp);
- napi_gro_complete(pp);
+ napi_gro_complete(napi, pp);
napi->gro_hash[hash].count--;
}
@@ -5744,7 +5802,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
goto normal;
if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
- gro_flush_oldest(gro_head);
+ gro_flush_oldest(napi, gro_head);
} else {
napi->gro_hash[hash].count++;
}
@@ -5802,26 +5860,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
@@ -6200,8 +6238,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
NAPIF_STATE_IN_BUSY_POLL)))
return false;
- gro_normal_list(n);
-
if (n->gro_bitmask) {
unsigned long timeout = 0;
@@ -6217,6 +6253,9 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
}
+
+ gro_normal_list(n);
+
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
@@ -6548,8 +6587,6 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
goto out_unlock;
}
- gro_normal_list(n);
-
if (n->gro_bitmask) {
/* flush too old packets
* If HZ < 1000, flush all packets.
@@ -6557,6 +6594,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
napi_gro_flush(n, HZ >= 1000);
}
+ gro_normal_list(n);
+
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
*/
@@ -7151,8 +7190,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
return 0;
}
-static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
@@ -7164,6 +7203,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
+EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
static u8 __netdev_upper_depth(struct net_device *dev)
{
@@ -8194,6 +8234,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
}
EXPORT_SYMBOL(__dev_set_mtu);
+int dev_validate_mtu(struct net_device *dev, int new_mtu,
+ struct netlink_ext_ack *extack)
+{
+ /* MTU must be positive, and in range */
+ if (new_mtu < 0 || new_mtu < dev->min_mtu) {
+ NL_SET_ERR_MSG(extack, "mtu less than device minimum");
+ return -EINVAL;
+ }
+
+ if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
+ NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/**
* dev_set_mtu_ext - Change maximum transfer unit
* @dev: device
@@ -8210,16 +8266,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
if (new_mtu == dev->mtu)
return 0;
- /* MTU must be positive, and in range */
- if (new_mtu < 0 || new_mtu < dev->min_mtu) {
- NL_SET_ERR_MSG(extack, "mtu less than device minimum");
- return -EINVAL;
- }
-
- if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
- NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
- return -EINVAL;
- }
+ err = dev_validate_mtu(dev, new_mtu, extack);
+ if (err)
+ return err;
if (!netif_device_present(dev))
return -ENODEV;
@@ -8542,7 +8591,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
+ bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
+ struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
+ int err;
+
+ if (non_hw) {
+ prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
+ XDP_QUERY_PROG));
+ if (IS_ERR(prev_prog))
+ prev_prog = NULL;
+ }
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -8553,7 +8612,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
xdp.flags = flags;
xdp.prog = prog;
- return bpf_op(dev, &xdp);
+ err = bpf_op(dev, &xdp);
+ if (!err && non_hw)
+ bpf_prog_change_xdp(prev_prog, prog);
+
+ if (prev_prog)
+ bpf_prog_put(prev_prog);
+
+ return err;
}
static void dev_xdp_uninstall(struct net_device *dev)
@@ -9257,7 +9323,7 @@ int register_netdevice(struct net_device *dev)
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
- dev->hw_features |= NETIF_F_SOFT_FEATURES;
+ dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
dev->features |= NETIF_F_SOFT_FEATURES;
if (dev->netdev_ops->ndo_udp_tunnel_add) {
@@ -9302,8 +9368,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret)
+ if (ret) {
+ dev->reg_state = NETREG_UNREGISTERED;
goto err_uninit;
+ }
dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
@@ -9750,6 +9818,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
+ INIT_LIST_HEAD(&dev->net_notifier_list);
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
@@ -9820,6 +9889,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ free_percpu(dev->xdp_bulkq);
+ dev->xdp_bulkq = NULL;
netdev_unregister_lockdep_key(dev);
@@ -10011,6 +10082,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
netdev_adjacent_del_links(dev);
+ /* Move per-net netdevice notifiers that are following the netdevice */
+ move_netdevice_notifiers_dev_net(dev, net);
+
/* Actually switch the network namespace */
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5163d900bb4f..dbaebbe573f0 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -187,6 +187,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_TX_OFF:
case HWTSTAMP_TX_ON:
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
tx_type_valid = 1;
break;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f76219bf0c21..b831c5545d6a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2103,11 +2103,11 @@ err_action_values_put:
static struct devlink_dpipe_table *
devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name)
+ const char *table_name, struct devlink *devlink)
{
struct devlink_dpipe_table *table;
-
- list_for_each_entry_rcu(table, dpipe_tables, list) {
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
if (!strcmp(table->name, table_name))
return table;
}
@@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink,
struct devlink_dpipe_table *table;
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
struct genl_info *info,
union devlink_param_value *value)
{
+ struct nlattr *param_data;
int len;
- if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
- !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
return -EINVAL;
switch (param->type) {
case DEVLINK_PARAM_TYPE_U8:
- value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
break;
case DEVLINK_PARAM_TYPE_U16:
- value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
break;
case DEVLINK_PARAM_TYPE_U32:
- value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
break;
case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
- nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
- if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- strcpy(value->vstr,
- nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ strcpy(value->vstr, nla_data(param_data));
break;
case DEVLINK_PARAM_TYPE_BOOL:
- value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
- true : false;
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
break;
}
return 0;
@@ -3986,6 +3993,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto out_unlock;
}
+ /* return 0 if there is no further data to read */
+ if (start_offset >= region->size) {
+ err = 0;
+ goto out_unlock;
+ }
+
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
DEVLINK_CMD_REGION_READ);
@@ -4843,22 +4856,100 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
-void
-devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
- enum devlink_health_reporter_state state)
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_health_reporter *reporter,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
{
- if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
- state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ struct nlattr *reporter_attr;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ reporter_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_HEALTH_REPORTER);
+ if (!reporter_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ reporter->ops->name))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+ reporter->health_state))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period,
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+ reporter->auto_recover))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts),
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+
+ nla_nest_end(msg, reporter_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+reporter_nest_cancel:
+ nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_recover_notify(struct devlink_health_reporter *reporter,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
return;
- if (reporter->health_state == state)
+ err = devlink_nl_health_reporter_fill(msg, reporter->devlink,
+ reporter, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
return;
+ }
- reporter->health_state = state;
- trace_devlink_health_reporter_state_update(reporter->devlink,
- reporter->ops->name, state);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(reporter->devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
+void
+devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
+{
+ reporter->recovery_count++;
+ reporter->last_recovery_ts = jiffies;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
static int
devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
@@ -4876,9 +4967,9 @@ devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
if (err)
return err;
- reporter->recovery_count++;
+ devlink_health_reporter_recovery_done(reporter);
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
- reporter->last_recovery_ts = jiffies;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
return 0;
}
@@ -4945,6 +5036,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
reporter->error_count++;
prev_health_state = reporter->health_state;
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
/* abort if the previous error wasn't recovered */
if (reporter->auto_recover &&
@@ -5027,68 +5119,23 @@ devlink_health_reporter_put(struct devlink_health_reporter *reporter)
refcount_dec(&reporter->refcount);
}
-static int
-devlink_nl_health_reporter_fill(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_health_reporter *reporter,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+ enum devlink_health_reporter_state state)
{
- struct nlattr *reporter_attr;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- reporter_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_HEALTH_REPORTER);
- if (!reporter_attr)
- goto genlmsg_cancel;
- if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
- reporter->ops->name))
- goto reporter_nest_cancel;
- if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
- reporter->health_state))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
- reporter->error_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
- reporter->recovery_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
- reporter->graceful_period,
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
- reporter->auto_recover))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
- jiffies_to_msecs(reporter->dump_ts),
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
- reporter->dump_real_ts, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
+ if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+ state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ return;
- nla_nest_end(msg, reporter_attr);
- genlmsg_end(msg, hdr);
- return 0;
+ if (reporter->health_state == state)
+ return;
-reporter_nest_cancel:
- nla_nest_end(msg, reporter_attr);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
+ reporter->health_state = state;
+ trace_devlink_health_reporter_state_update(reporter->devlink,
+ reporter->ops->name, state);
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
struct genl_info *info)
@@ -5911,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
@@ -6814,7 +6863,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
rcu_read_lock();
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
enabled = false;
if (table)
enabled = table->counters_enabled;
@@ -6838,26 +6887,34 @@ int devlink_dpipe_table_register(struct devlink *devlink,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
- return -EEXIST;
+ int err = 0;
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
+ mutex_lock(&devlink->lock);
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ if (!table) {
+ err = -ENOMEM;
+ goto unlock;
+ }
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+unlock:
mutex_unlock(&devlink->lock);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -6874,7 +6931,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
goto unlock;
list_del_rcu(&table->list);
@@ -7031,7 +7088,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table) {
err = -EINVAL;
goto out;
@@ -7674,6 +7731,9 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(NON_ROUTABLE, DROP),
+ DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
+ DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -7686,6 +7746,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
DEVLINK_TRAP_GROUP(L2_DROPS),
DEVLINK_TRAP_GROUP(L3_DROPS),
DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+ DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
};
static int devlink_trap_generic_verify(const struct devlink_trap *trap)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 536e032d95c8..31700e0c3928 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -802,16 +802,12 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
if (!n_hw_metadata)
return NULL;
- trap_group_name = kmemdup(hw_metadata->trap_group_name,
- strlen(hw_metadata->trap_group_name) + 1,
- GFP_ATOMIC | __GFP_ZERO);
+ trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC);
if (!trap_group_name)
goto free_hw_metadata;
n_hw_metadata->trap_group_name = trap_group_name;
- trap_name = kmemdup(hw_metadata->trap_name,
- strlen(hw_metadata->trap_name) + 1,
- GFP_ATOMIC | __GFP_ZERO);
+ trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC);
if (!trap_name)
goto free_trap_group;
n_hw_metadata->trap_name = trap_name;
@@ -1004,8 +1000,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
{
int cpu;
- if (!monitor_hw)
+ if (!monitor_hw) {
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+ return;
+ }
monitor_hw = false;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3e7e15278c46..bd7eba9066f8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh = nlmsg_data(nlh);
frh->family = ops->family;
- frh->table = rule->table;
+ frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
diff --git a/net/core/filter.c b/net/core/filter.c
index 538f6a735a19..c180871e606d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1573,7 +1573,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
return -EPERM;
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
- if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+ if (PTR_ERR(prog) == -EINVAL)
prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -3459,119 +3459,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
-static int __bpf_tx_xdp(struct net_device *dev,
- struct bpf_map *map,
- struct xdp_buff *xdp,
- u32 index)
-{
- struct xdp_frame *xdpf;
- int err, sent;
-
- if (!dev->netdev_ops->ndo_xdp_xmit) {
- return -EOPNOTSUPP;
- }
-
- err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
- if (unlikely(err))
- return err;
-
- xdpf = convert_to_xdp_frame(xdp);
- if (unlikely(!xdpf))
- return -EOVERFLOW;
-
- sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
- if (sent <= 0)
- return sent;
- return 0;
-}
-
-static noinline int
-xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
-{
- struct net_device *fwd;
- u32 index = ri->tgt_index;
- int err;
-
- fwd = dev_get_by_index_rcu(dev_net(dev), index);
- ri->tgt_index = 0;
- if (unlikely(!fwd)) {
- err = -EINVAL;
- goto err;
- }
-
- err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
- if (unlikely(err))
- goto err;
-
- _trace_xdp_redirect(dev, xdp_prog, index);
- return 0;
-err:
- _trace_xdp_redirect_err(dev, xdp_prog, index, err);
- return err;
-}
-
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
- struct bpf_map *map,
- struct xdp_buff *xdp,
- u32 index)
+ struct bpf_map *map, struct xdp_buff *xdp)
{
- int err;
-
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH: {
- struct bpf_dtab_netdev *dst = fwd;
-
- err = dev_map_enqueue(dst, xdp, dev_rx);
- if (unlikely(err))
- return err;
- break;
- }
- case BPF_MAP_TYPE_CPUMAP: {
- struct bpf_cpu_map_entry *rcpu = fwd;
-
- err = cpu_map_enqueue(rcpu, xdp, dev_rx);
- if (unlikely(err))
- return err;
- break;
- }
- case BPF_MAP_TYPE_XSKMAP: {
- struct xdp_sock *xs = fwd;
-
- err = __xsk_map_redirect(map, xdp, xs);
- return err;
- }
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ return dev_map_enqueue(fwd, xdp, dev_rx);
+ case BPF_MAP_TYPE_CPUMAP:
+ return cpu_map_enqueue(fwd, xdp, dev_rx);
+ case BPF_MAP_TYPE_XSKMAP:
+ return __xsk_map_redirect(fwd, xdp);
default:
- break;
+ return -EBADRQC;
}
return 0;
}
-void xdp_do_flush_map(void)
+void xdp_do_flush(void)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- struct bpf_map *map = ri->map_to_flush;
-
- ri->map_to_flush = NULL;
- if (map) {
- switch (map->map_type) {
- case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH:
- __dev_map_flush(map);
- break;
- case BPF_MAP_TYPE_CPUMAP:
- __cpu_map_flush(map);
- break;
- case BPF_MAP_TYPE_XSKMAP:
- __xsk_map_flush(map);
- break;
- default:
- break;
- }
- }
+ __dev_flush();
+ __cpu_map_flush();
+ __xsk_map_flush();
}
-EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+EXPORT_SYMBOL_GPL(xdp_do_flush);
static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
{
@@ -3606,10 +3517,11 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
}
-static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- struct bpf_redirect_info *ri)
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->tgt_index;
void *fwd = ri->tgt_value;
int err;
@@ -3618,32 +3530,27 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
- if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
- xdp_do_flush_map();
+ if (unlikely(!map)) {
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = dev_xdp_enqueue(fwd, xdp, dev);
+ } else {
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+ }
- err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
if (unlikely(err))
goto err;
- ri->map_to_flush = map;
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
return 0;
err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
return err;
}
-
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
-{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- struct bpf_map *map = READ_ONCE(ri->map);
-
- if (likely(map))
- return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
-
- return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
-}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
static int xdp_do_generic_redirect_map(struct net_device *dev,
@@ -5976,7 +5883,7 @@ bool bpf_helper_changes_pkt_data(void *func)
return false;
}
-static const struct bpf_func_proto *
+const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -6016,6 +5923,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_spin_unlock_proto;
case BPF_FUNC_trace_printk:
return bpf_get_trace_printk_proto();
+ case BPF_FUNC_jiffies64:
+ return &bpf_jiffies64_proto;
default:
return NULL;
}
@@ -7648,21 +7557,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, type):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
- *target_size = 2;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_type),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_type,
+ sizeof_field(struct sock, sk_type),
+ target_size));
break;
case offsetof(struct bpf_sock, protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
- *target_size = 1;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_protocol),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_protocol,
+ sizeof_field(struct sock, sk_protocol),
+ target_size));
break;
case offsetof(struct bpf_sock, src_ip4):
@@ -7944,20 +7853,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_addr, type):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_type);
break;
case offsetof(struct bpf_sock_addr, protocol):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_protocol);
break;
case offsetof(struct bpf_sock_addr, msg_src_ip4):
@@ -8876,11 +8778,11 @@ sk_reuseport_is_valid_access(int off, int size,
skb, \
SKB_FIELD)
-#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
- struct sock, \
- sk, \
- SK_FIELD, BPF_SIZE, EXTRA_OFF)
+#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
+ struct sock, \
+ sk, \
+ SK_FIELD)
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
@@ -8904,16 +8806,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_reuseport_md, ip_protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
- BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
- /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
- * aware. No further narrowing or masking is needed.
- */
- *target_size = 1;
+ SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
break;
case offsetof(struct sk_reuseport_md, data_end):
@@ -8941,3 +8834,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */
+
+DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+{
+ bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
+ prev_prog, prog);
+}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2dbbb030fbed..a1670dff0629 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -233,7 +233,7 @@ static bool icmp_has_id(u8 type)
* @skb: sk_buff to extract from
* @key_icmp: struct flow_dissector_key_icmp to fill
* @data: raw buffer pointer to the packet
- * @toff: offset to extract at
+ * @thoff: offset to extract at
* @hlen: packet header length
*/
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
@@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector *flow_dissector,
void *target_container)
{
+ struct flow_dissector_key_ports *key_ports = NULL;
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_tags *key_tags;
key_control = skb_flow_dissector_target(flow_dissector,
@@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
+ else if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE))
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
+
+ if (key_ports) {
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 920784a9b7ff..789a73aa7bd8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3290,6 +3290,7 @@ static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
+ (*pos)++;
return NULL;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 39402840025e..757cc1d084e7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -211,16 +211,10 @@ static int net_eq_idr(int id, void *net, void *peer)
return 0;
}
-/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
- * is set to true, thus the caller knows that the new id must be notified via
- * rtnl.
- */
-static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
+/* Must be called from RCU-critical section or with nsid_lock held */
+static int __peernet2id(const struct net *net, struct net *peer)
{
int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
- bool alloc_it = *alloc;
-
- *alloc = false;
/* Magic value for id 0. */
if (id == NET_ID_ZERO)
@@ -228,23 +222,9 @@ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
if (id > 0)
return id;
- if (alloc_it) {
- id = alloc_netid(net, peer, -1);
- *alloc = true;
- return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
- }
-
return NETNSA_NSID_NOT_ASSIGNED;
}
-/* should be called with nsid_lock held */
-static int __peernet2id(struct net *net, struct net *peer)
-{
- bool no = false;
-
- return __peernet2id_alloc(net, peer, &no);
-}
-
static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
struct nlmsghdr *nlh, gfp_t gfp);
/* This function returns the id of a peer netns. If no id is assigned, one will
@@ -252,38 +232,50 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
*/
int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
{
- bool alloc = false, alive = false;
int id;
if (refcount_read(&net->count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
- spin_lock_bh(&net->nsid_lock);
- /*
- * When peer is obtained from RCU lists, we may race with
+
+ spin_lock(&net->nsid_lock);
+ id = __peernet2id(net, peer);
+ if (id >= 0) {
+ spin_unlock(&net->nsid_lock);
+ return id;
+ }
+
+ /* When peer is obtained from RCU lists, we may race with
* its cleanup. Check whether it's alive, and this guarantees
* we never hash a peer back to net->netns_ids, after it has
* just been idr_remove()'d from there in cleanup_net().
*/
- if (maybe_get_net(peer))
- alive = alloc = true;
- id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_bh(&net->nsid_lock);
- if (alloc && id >= 0)
- rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
- if (alive)
- put_net(peer);
+ if (!maybe_get_net(peer)) {
+ spin_unlock(&net->nsid_lock);
+ return NETNSA_NSID_NOT_ASSIGNED;
+ }
+
+ id = alloc_netid(net, peer, -1);
+ spin_unlock(&net->nsid_lock);
+
+ put_net(peer);
+ if (id < 0)
+ return NETNSA_NSID_NOT_ASSIGNED;
+
+ rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
+
return id;
}
EXPORT_SYMBOL_GPL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
-int peernet2id(struct net *net, struct net *peer)
+int peernet2id(const struct net *net, struct net *peer)
{
int id;
- spin_lock_bh(&net->nsid_lock);
+ rcu_read_lock();
id = __peernet2id(net, peer);
- spin_unlock_bh(&net->nsid_lock);
+ rcu_read_unlock();
+
return id;
}
EXPORT_SYMBOL(peernet2id);
@@ -291,12 +283,12 @@ EXPORT_SYMBOL(peernet2id);
/* This function returns true is the peer netns has an id assigned into the
* current netns.
*/
-bool peernet_has_id(struct net *net, struct net *peer)
+bool peernet_has_id(const struct net *net, struct net *peer)
{
return peernet2id(net, peer) >= 0;
}
-struct net *get_net_ns_by_id(struct net *net, int id)
+struct net *get_net_ns_by_id(const struct net *net, int id)
{
struct net *peer;
@@ -528,20 +520,20 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock_bh(&tmp->nsid_lock);
+ spin_lock(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
+ spin_unlock(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
GFP_KERNEL);
if (tmp == last)
break;
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -754,9 +746,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
err = -EEXIST;
NL_SET_BAD_ATTR(extack, nla);
NL_SET_ERR_MSG(extack,
@@ -765,7 +757,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
nlh, GFP_KERNEL);
@@ -950,6 +942,7 @@ struct rtnl_net_dump_cb {
int s_idx;
};
+/* Runs in RCU-critical section. */
static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
@@ -1034,19 +1027,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
goto end;
}
- spin_lock_bh(&net_cb.tgt_net->nsid_lock);
- if (net_cb.fillargs.add_ref &&
- !net_eq(net_cb.ref_net, net_cb.tgt_net) &&
- !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) {
- spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
- err = -EAGAIN;
- goto end;
- }
+ rcu_read_lock();
idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- if (net_cb.fillargs.add_ref &&
- !net_eq(net_cb.ref_net, net_cb.tgt_net))
- spin_unlock_bh(&net_cb.ref_net->nsid_lock);
- spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
+ rcu_read_unlock();
cb->args[0] = net_cb.idx;
end:
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0642f91c4038..b4c87fe31be2 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
+/*
+ * To avoid freezing of sockets creation for tasks with big number of threads
+ * and opened sockets lets release file_lock every 1000 iterated descriptors.
+ * New sockets will already have been created with new classid.
+ */
+
+struct update_classid_context {
+ u32 classid;
+ unsigned int batch;
+};
+
+#define UPDATE_CLASSID_BATCH 1000
+
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
+ struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file, &err);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
- sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
- (unsigned long)v);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+ }
return 0;
}
+static void update_classid_task(struct task_struct *p, u32 classid)
+{
+ struct update_classid_context ctx = {
+ .classid = classid,
+ .batch = UPDATE_CLASSID_BATCH
+ };
+ unsigned int fd = 0;
+
+ do {
+ task_lock(p);
+ fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
+ task_unlock(p);
+ cond_resched();
+ } while (fd);
+}
+
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)css_cls_state(css)->classid);
- task_unlock(p);
+ update_classid_task(p, css_cls_state(css)->classid);
}
}
@@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)cs->classid);
- task_unlock(p);
+ update_classid_task(p, cs->classid);
cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a6aefe989043..10d2b255df5e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -96,40 +96,76 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
-/* fast path */
-static struct page *__page_pool_get_cached(struct page_pool *pool)
+static void __page_pool_return_page(struct page_pool *pool, struct page *page);
+
+noinline
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
- bool refill = false;
struct page *page;
-
- /* Test for safe-context, caller should provide this guarantee */
- if (likely(in_serving_softirq())) {
- if (likely(pool->alloc.count)) {
- /* Fast-path */
- page = pool->alloc.cache[--pool->alloc.count];
- return page;
- }
- refill = true;
- }
+ int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
if (__ptr_ring_empty(r))
return NULL;
- /* Slow-path: Get page from locked ring queue,
- * refill alloc array if requested.
+ /* Softirq guarantee CPU and thus NUMA node is stable. This,
+ * assumes CPU refilling driver RX-ring will also run RX-NAPI.
*/
+#ifdef CONFIG_NUMA
+ pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
+#else
+ /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
+ pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
+#endif
+
+ /* Slower-path: Get pages from locked ring queue */
spin_lock(&r->consumer_lock);
- page = __ptr_ring_consume(r);
- if (refill)
- pool->alloc.count = __ptr_ring_consume_batched(r,
- pool->alloc.cache,
- PP_ALLOC_CACHE_REFILL);
+
+ /* Refill alloc array, but only if NUMA match */
+ do {
+ page = __ptr_ring_consume(r);
+ if (unlikely(!page))
+ break;
+
+ if (likely(page_to_nid(page) == pref_nid)) {
+ pool->alloc.cache[pool->alloc.count++] = page;
+ } else {
+ /* NUMA mismatch;
+ * (1) release 1 page to page-allocator and
+ * (2) break out to fallthrough to alloc_pages_node.
+ * This limit stress on page buddy alloactor.
+ */
+ __page_pool_return_page(pool, page);
+ page = NULL;
+ break;
+ }
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
+
+ /* Return last page */
+ if (likely(pool->alloc.count > 0))
+ page = pool->alloc.cache[--pool->alloc.count];
+
spin_unlock(&r->consumer_lock);
return page;
}
+/* fast path */
+static struct page *__page_pool_get_cached(struct page_pool *pool)
+{
+ struct page *page;
+
+ /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
+ if (likely(pool->alloc.count)) {
+ /* Fast-path */
+ page = pool->alloc.cache[--pool->alloc.count];
+ } else {
+ page = page_pool_refill_alloc_cache(pool);
+ }
+
+ return page;
+}
+
static void page_pool_dma_sync_for_device(struct page_pool *pool,
struct page *page,
unsigned int dma_sync_size)
@@ -163,7 +199,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
/* Cache was empty, do real allocation */
+#ifdef CONFIG_NUMA
page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
+#else
+ page = alloc_pages(gfp, pool->p.order);
+#endif
if (!page)
return NULL;
@@ -311,13 +351,10 @@ static bool __page_pool_recycle_direct(struct page *page,
/* page is NOT reusable when:
* 1) allocated when system is under some pressure. (page_is_pfmemalloc)
- * 2) belongs to a different NUMA node than pool->p.nid.
- *
- * To update pool->p.nid users must call page_pool_update_nid.
*/
static bool pool_page_reusable(struct page_pool *pool, struct page *page)
{
- return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid;
+ return !page_is_pfmemalloc(page);
}
void __page_pool_put_page(struct page_pool *pool, struct page *page,
@@ -484,7 +521,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
+ struct page *page;
+
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
+
+ /* Flush pool alloc cache, as refill will check NUMA node */
+ while (pool->alloc.count) {
+ page = pool->alloc.cache[--pool->alloc.count];
+ __page_pool_return_page(pool, page);
+ }
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 294bfcf0ce0e..acc849df60b5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -535,12 +535,12 @@ static int pgctrl_open(struct inode *inode, struct file *file)
return single_open(file, pgctrl_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_fops = {
- .open = pgctrl_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pgctrl_write,
- .release = single_release,
+static const struct proc_ops pktgen_proc_ops = {
+ .proc_open = pgctrl_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pgctrl_write,
+ .proc_release = single_release,
};
static int pktgen_if_show(struct seq_file *seq, void *v)
@@ -1707,12 +1707,12 @@ static int pktgen_if_open(struct inode *inode, struct file *file)
return single_open(file, pktgen_if_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_if_fops = {
- .open = pktgen_if_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pktgen_if_write,
- .release = single_release,
+static const struct proc_ops pktgen_if_proc_ops = {
+ .proc_open = pktgen_if_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pktgen_if_write,
+ .proc_release = single_release,
};
static int pktgen_thread_show(struct seq_file *seq, void *v)
@@ -1844,12 +1844,12 @@ static int pktgen_thread_open(struct inode *inode, struct file *file)
return single_open(file, pktgen_thread_show, PDE_DATA(inode));
}
-static const struct file_operations pktgen_thread_fops = {
- .open = pktgen_thread_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = pktgen_thread_write,
- .release = single_release,
+static const struct proc_ops pktgen_thread_proc_ops = {
+ .proc_open = pktgen_thread_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = pktgen_thread_write,
+ .proc_release = single_release,
};
/* Think find or remove for NN */
@@ -1926,7 +1926,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
pkt_dev->entry = proc_create_data(dev->name, 0600,
pn->proc_dir,
- &pktgen_if_fops,
+ &pktgen_if_proc_ops,
pkt_dev);
if (!pkt_dev->entry)
pr_err("can't move proc entry for '%s'\n",
@@ -3638,7 +3638,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->clone_skb = pg_clone_skb_d;
pkt_dev->entry = proc_create_data(ifname, 0600, t->net->proc_dir,
- &pktgen_if_fops, pkt_dev);
+ &pktgen_if_proc_ops, pkt_dev);
if (!pkt_dev->entry) {
pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, ifname);
@@ -3708,7 +3708,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
t->tsk = p;
pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir,
- &pktgen_thread_fops, t);
+ &pktgen_thread_proc_ops, t);
if (!pe) {
pr_err("cannot create %s/%s procfs entry\n",
PG_PROC_DIR, t->tsk->comm);
@@ -3793,7 +3793,7 @@ static int __net_init pg_net_init(struct net *net)
pr_warn("cannot create /proc/net/%s\n", PG_PROC_DIR);
return -ENODEV;
}
- pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_fops);
+ pe = proc_create(PGCTRL, 0600, pn->proc_dir, &pktgen_proc_ops);
if (pe == NULL) {
pr_err("cannot create %s procfs entry\n", PGCTRL);
ret = -EINVAL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02916f43bf63..e1152f4ffe33 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ rtnl_prop_list_size(dev)
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ 0;
}
@@ -1241,6 +1242,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
return 0;
memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
+ memset(&node_guid, 0, sizeof(node_guid));
+ memset(&port_guid, 0, sizeof(port_guid));
vf_mac.vf =
vf_vlan.vf =
@@ -1289,8 +1292,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
sizeof(vf_trust), &vf_trust))
goto nla_put_vf_failure;
- memset(&node_guid, 0, sizeof(node_guid));
- memset(&port_guid, 0, sizeof(port_guid));
if (dev->netdev_ops->ndo_get_vf_guid &&
!dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid,
&port_guid)) {
@@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
goto nla_put_failure;
+ if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) &&
+ nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr))
+ goto nla_put_failure;
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
@@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROP_LIST] = { .type = NLA_NESTED },
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
+ [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -3048,8 +3053,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
- if (tb[IFLA_MTU])
- dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+ if (tb[IFLA_MTU]) {
+ u32 mtu = nla_get_u32(tb[IFLA_MTU]);
+ int err;
+
+ err = dev_validate_mtu(dev, mtu, extack);
+ if (err) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+ dev->mtu = mtu;
+ }
if (tb[IFLA_ADDRESS]) {
memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
nla_len(tb[IFLA_ADDRESS]));
@@ -3490,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
if (err)
return err;
- alt_ifname = nla_data(attr);
+ alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+
if (cmd == RTM_NEWLINKPROP) {
- alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
- if (!alt_ifname)
- return -ENOMEM;
err = netdev_name_node_alt_create(dev, alt_ifname);
- if (err) {
- kfree(alt_ifname);
- return err;
- }
+ if (!err)
+ alt_ifname = NULL;
} else if (cmd == RTM_DELLINKPROP) {
err = netdev_name_node_alt_destroy(dev, alt_ifname);
- if (err)
- return err;
} else {
- WARN_ON(1);
- return 0;
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
}
- *changed = true;
- return 0;
+ kfree(alt_ifname);
+ if (!err)
+ *changed = true;
+ return err;
}
static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 973a71f4bc89..e1101a4f90a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,6 +68,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
+#include <net/mptcp.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -466,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -526,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
return NULL;
}
- /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->page.pfmemalloc)
skb->pfmemalloc = 1;
skb->head_frag = 1;
@@ -3638,6 +3637,97 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
return head_frag;
}
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ netdev_features_t features,
+ unsigned int offset)
+{
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int delta_truesize = 0;
+ unsigned int delta_len = 0;
+ struct sk_buff *tail = NULL;
+ struct sk_buff *nskb;
+
+ skb_push(skb, -skb_network_offset(skb) + offset);
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ do {
+ nskb = list_skb;
+ list_skb = list_skb->next;
+
+ if (!tail)
+ skb->next = nskb;
+ else
+ tail->next = nskb;
+
+ tail = nskb;
+
+ delta_len += nskb->len;
+ delta_truesize += nskb->truesize;
+
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
+
+ __copy_skb_header(nskb, skb);
+
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ nskb->data - tnl_hlen,
+ offset + tnl_hlen);
+
+ if (skb_needs_linearize(nskb, features) &&
+ __skb_linearize(nskb))
+ goto err_linearize;
+
+ } while (list_skb);
+
+ skb->truesize = skb->truesize - delta_truesize;
+ skb->data_len = skb->data_len - delta_len;
+ skb->len = skb->len - delta_len;
+
+ skb_gso_reset(skb);
+
+ skb->prev = tail;
+
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto err_linearize;
+
+ skb_get(skb);
+
+ return skb;
+
+err_linearize:
+ kfree_skb_list(skb->next);
+ skb->next = NULL;
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(skb_segment_list);
+
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive_list);
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
@@ -4109,6 +4199,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4123,6 +4216,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
skb_ext_type_len[TC_SKB_EXT] +
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ skb_ext_type_len[SKB_EXT_MPTCP] +
+#endif
0;
}
@@ -4707,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
typeof(IPPROTO_IP) proto,
unsigned int off)
{
- switch (proto) {
- int err;
+ int err;
+ switch (proto) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
off + MAX_TCP_HDR_LEN);
@@ -5472,12 +5568,15 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
}
/**
- * skb_mpls_push() - push a new MPLS header after the mac header
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
+ * the packet
*
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
* @mac_len: length of the MAC header
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
+ * ethernet
*
* Expects skb->data at mac header.
*
@@ -5501,7 +5600,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, mac_len);
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_protocol(skb, skb->protocol);
}
@@ -5510,6 +5609,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
mac_len);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
+ skb_reset_mac_len(skb);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
@@ -5529,7 +5629,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
* @mac_len: length of the MAC header
- * @ethernet: flag to indicate if ethernet header is present in packet
+ * @ethernet: flag to indicate if the packet is ethernet
*
* Expects skb->data at mac header.
*
@@ -5976,7 +6076,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
}
-static struct skb_ext *skb_ext_alloc(void)
+/**
+ * __skb_ext_alloc - allocate a new skb extensions storage
+ *
+ * Returns the newly allocated pointer. The pointer can later attached to a
+ * skb via __skb_ext_set().
+ * Note: caller must handle the skb_ext as an opaque data.
+ */
+struct skb_ext *__skb_ext_alloc(void)
{
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
@@ -6017,6 +6124,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
}
/**
+ * __skb_ext_set - attach the specified extension storage to this skb
+ * @skb: buffer
+ * @id: extension id
+ * @ext: extension storage previously allocated via __skb_ext_alloc()
+ *
+ * Existing extensions, if any, are cleared.
+ *
+ * Returns the pointer to the extension.
+ */
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
+ struct skb_ext *ext)
+{
+ unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
+
+ skb_ext_put(skb);
+ newlen = newoff + skb_ext_type_len[id];
+ ext->chunks = newlen;
+ ext->offset[id] = newoff;
+ skb->extensions = ext;
+ skb->active_extensions = 1 << id;
+ return skb_ext_get_ptr(ext, id);
+}
+
+/**
* skb_ext_add - allocate space for given extension, COW if needed
* @skb: buffer
* @id: extension to allocate space for
@@ -6049,7 +6180,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
} else {
newoff = SKB_EXT_CHUNKSIZEOF(*new);
- new = skb_ext_alloc();
+ new = __skb_ext_alloc();
if (!new)
return NULL;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 3866d7e20c07..ded2d5227678 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -594,8 +594,6 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- sock_owned_by_me(sk);
-
sk_psock_cork_free(psock);
sk_psock_zap_ingress(psock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 8459ad579f73..8f71684305c3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
- mem_cgroup_sk_alloc(newsk);
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();
@@ -2786,7 +2789,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_unlock();
}
-static void sock_def_readable(struct sock *sk)
+void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8998e356f423..085cef5857bb 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map)
int i;
synchronize_rcu();
- rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct sock **psk = &stab->sks[i];
@@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map)
sk = xchg(psk, NULL);
if (sk) {
lock_sock(sk);
+ rcu_read_lock();
sock_map_unref(sk, psk);
+ rcu_read_unlock();
release_sock(sk);
}
}
raw_spin_unlock_bh(&stab->lock);
- rcu_read_unlock();
+ /* wait for psock readers accessing its map link */
synchronize_rcu();
bpf_map_area_free(stab->sks);
@@ -416,14 +417,16 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL;
goto out;
}
- if (!sock_map_sk_is_suitable(sk) ||
- sk->sk_state != TCP_ESTABLISHED) {
+ if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
- ret = sock_map_update_common(map, idx, sk, flags);
+ if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -EOPNOTSUPP;
+ else
+ ret = sock_map_update_common(map, idx, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
@@ -739,14 +742,16 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
ret = -EINVAL;
goto out;
}
- if (!sock_map_sk_is_suitable(sk) ||
- sk->sk_state != TCP_ESTABLISHED) {
+ if (!sock_map_sk_is_suitable(sk)) {
ret = -EOPNOTSUPP;
goto out;
}
sock_map_sk_acquire(sk);
- ret = sock_hash_update_common(map, key, sk, flags);
+ if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -EOPNOTSUPP;
+ else
+ ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
@@ -859,19 +864,22 @@ static void sock_hash_free(struct bpf_map *map)
int i;
synchronize_rcu();
- rcu_read_lock();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
lock_sock(elem->sk);
+ rcu_read_lock();
sock_map_unref(elem->sk, elem);
+ rcu_read_unlock();
release_sock(elem->sk);
}
raw_spin_unlock_bh(&bucket->lock);
}
- rcu_read_unlock();
+
+ /* wait for psock readers accessing its map link */
+ synchronize_rcu();
bpf_map_area_free(htab->buckets);
kfree(htab);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index f19f179538b9..91e9f2223c39 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -107,7 +107,6 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
if (!more_reuse)
return NULL;
- more_reuse->max_socks = more_socks_size;
more_reuse->num_socks = reuse->num_socks;
more_reuse->prog = reuse->prog;
more_reuse->reuseport_id = reuse->reuseport_id;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7911235706a9..04840697fe79 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -13,7 +13,7 @@
static unsigned int classify(const struct sk_buff *skb)
{
if (likely(skb->dev && skb->dev->phydev &&
- skb->dev->phydev->drv))
+ skb->dev->phydev->mii_ts))
return ptp_classify_raw(skb);
else
return PTP_CLASS_NONE;
@@ -21,7 +21,7 @@ static unsigned int classify(const struct sk_buff *skb)
void skb_clone_tx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
struct sk_buff *clone;
unsigned int type;
@@ -32,22 +32,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->txtstamp)) {
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->txtstamp)) {
clone = skb_clone_sk(skb);
if (!clone)
return;
- phydev->drv->txtstamp(phydev, clone, type);
+ mii_ts->txtstamp(mii_ts, clone, type);
}
}
EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
unsigned int type;
- if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts)
return false;
if (skb_headroom(skb) < ETH_HLEN)
@@ -62,9 +62,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return false;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->rxtstamp))
- return phydev->drv->rxtstamp(phydev, skb, type);
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->rxtstamp))
+ return mii_ts->rxtstamp(mii_ts, skb, type);
return false;
}
diff --git a/net/core/utils.c b/net/core/utils.c
index 6b6e51db9f3b..1f31a39236d5 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -438,6 +438,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+/**
+ * inet_proto_csum_replace16 - update layer 4 header checksum field
+ * @sum: Layer 4 header checksum field
+ * @skb: sk_buff for the packet
+ * @from: old IPv6 address
+ * @to: new IPv6 address
+ * @pseudohdr: True if layer 4 header checksum includes pseudoheader
+ *
+ * Update layer 4 header as per the update in IPv6 src/dst address.
+ *
+ * There is no need to update skb->csum in this function, because update in two
+ * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other
+ * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to
+ * update skb->csum, because update in 3 fields a.) IPv4 src/dst address,
+ * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as
+ * L4 Header checksum for skb->csum calculation.
+ */
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
bool pseudohdr)
@@ -449,9 +466,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_PARTIAL) {
*sum = csum_fold(csum_partial(diff, sizeof(diff),
~csum_unfold(*sum)));
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
- skb->csum = ~csum_partial(diff, sizeof(diff),
- ~skb->csum);
} else if (pseudohdr)
*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
csum_unfold(*sum)));
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index e19a92a62e14..0a46ea3bddd5 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -670,7 +670,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
- if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ if (protocol < 0 || protocol > U8_MAX)
return -EINVAL;
if (!net_eq(net, &init_net))
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 1e6c3cac11e6..92663dcb3aa2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -29,6 +29,12 @@ config NET_DSA_TAG_8021Q
Drivers which use these helpers should select this as dependency.
+config NET_DSA_TAG_AR9331
+ tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
+ help
+ Say Y or M if you want to enable support for tagging frames for
+ the Atheros AR9331 SoC with built-in switch.
+
config NET_DSA_TAG_BRCM_COMMON
tristate
default n
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9a482c38bdb1..108486cfdeef 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
+obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index c66abbed4daf..e7c30b472034 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -614,6 +614,32 @@ static int dsa_port_parse_dsa(struct dsa_port *dp)
return 0;
}
+static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
+ struct net_device *master)
+{
+ enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE;
+ struct dsa_switch *mds, *ds = dp->ds;
+ unsigned int mdp_upstream;
+ struct dsa_port *mdp;
+
+ /* It is possible to stack DSA switches onto one another when that
+ * happens the switch driver may want to know if its tagging protocol
+ * is going to work in such a configuration.
+ */
+ if (dsa_slave_dev_check(master)) {
+ mdp = dsa_slave_to_port(master);
+ mds = mdp->ds;
+ mdp_upstream = dsa_upstream_port(mds, mdp->index);
+ tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream,
+ DSA_TAG_PROTO_NONE);
+ }
+
+ /* If the master device is not itself a DSA slave in a disjoint DSA
+ * tree, then return immediately.
+ */
+ return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
+}
+
static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
{
struct dsa_switch *ds = dp->ds;
@@ -621,20 +647,21 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
+ tag_protocol = dsa_get_tag_protocol(dp, master);
tag_ops = dsa_tag_driver_get(tag_protocol);
if (IS_ERR(tag_ops)) {
if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
return -EPROBE_DEFER;
dev_warn(ds->dev, "No tagger for this switch\n");
+ dp->master = NULL;
return PTR_ERR(tag_ops);
}
+ dp->master = master;
dp->type = DSA_PORT_TYPE_CPU;
dp->filter = tag_ops->filter;
dp->rcv = tag_ops->rcv;
dp->tag_ops = tag_ops;
- dp->master = master;
dp->dst = dst;
return 0;
@@ -822,6 +849,19 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
return dsa_switch_parse_ports(ds, cd);
}
+static void dsa_switch_release_ports(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_port *dp, *next;
+
+ list_for_each_entry_safe(dp, next, &dst->ports, list) {
+ if (dp->ds != ds)
+ continue;
+ list_del(&dp->list);
+ kfree(dp);
+ }
+}
+
static int dsa_switch_probe(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst;
@@ -838,12 +878,17 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (!ds->num_ports)
return -EINVAL;
- if (np)
+ if (np) {
err = dsa_switch_parse_of(ds, np);
- else if (pdata)
+ if (err)
+ dsa_switch_release_ports(ds);
+ } else if (pdata) {
err = dsa_switch_parse(ds, pdata);
- else
+ if (err)
+ dsa_switch_release_ports(ds);
+ } else {
err = -ENODEV;
+ }
if (err)
return err;
@@ -851,8 +896,10 @@ static int dsa_switch_probe(struct dsa_switch *ds)
dst = ds->dst;
dsa_tree_get(dst);
err = dsa_tree_setup(dst);
- if (err)
+ if (err) {
+ dsa_switch_release_ports(ds);
dsa_tree_put(dst);
+ }
return err;
}
@@ -873,15 +920,9 @@ EXPORT_SYMBOL_GPL(dsa_register_switch);
static void dsa_switch_remove(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
- struct dsa_port *dp, *next;
dsa_tree_teardown(dst);
-
- list_for_each_entry_safe(dp, next, &dst->ports, list) {
- list_del(&dp->list);
- kfree(dp);
- }
-
+ dsa_switch_release_ports(ds);
dsa_tree_put(dst);
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2dd86d9bcda9..760e6ea3178a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -117,7 +117,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
@@ -150,22 +152,6 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
-void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state);
-void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state);
-void dsa_port_phylink_mac_config(struct phylink_config *config,
- unsigned int mode,
- const struct phylink_link_state *state);
-void dsa_port_phylink_mac_an_restart(struct phylink_config *config);
-void dsa_port_phylink_mac_link_down(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface);
-void dsa_port_phylink_mac_link_up(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
/* slave.c */
@@ -173,13 +159,12 @@ extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
int dsa_slave_create(struct dsa_port *dp);
void dsa_slave_destroy(struct net_device *slave_dev);
+bool dsa_slave_dev_check(const struct net_device *dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
-void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev);
-
static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 3255dfc97f86..bd44bde272f4 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -197,6 +197,35 @@ static int dsa_master_get_phys_port_name(struct net_device *dev,
return 0;
}
+static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct dsa_switch_tree *dst;
+ int err = -EOPNOTSUPP;
+ struct dsa_port *dp;
+
+ dst = ds->dst;
+
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ case SIOCSHWTSTAMP:
+ /* Deny PTP operations on master if there is at least one
+ * switch in the tree that is PTP capable.
+ */
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds->ops->port_hwtstamp_get ||
+ dp->ds->ops->port_hwtstamp_set)
+ return -EBUSY;
+ break;
+ }
+
+ if (cpu_dp->orig_ndo_ops && cpu_dp->orig_ndo_ops->ndo_do_ioctl)
+ err = cpu_dp->orig_ndo_ops->ndo_do_ioctl(dev, ifr, cmd);
+
+ return err;
+}
+
static int dsa_master_ethtool_setup(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -249,6 +278,7 @@ static int dsa_master_ndo_setup(struct net_device *dev)
memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops));
ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name;
+ ops->ndo_do_ioctl = dsa_master_ioctl;
dev->netdev_ops = ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 46ac9ba21987..ec13dc666788 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -63,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
-int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -78,14 +78,31 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ if (dp->pl)
+ phylink_start(dp->pl);
+
return 0;
}
-void dsa_port_disable(struct dsa_port *dp)
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ int err;
+
+ rtnl_lock();
+ err = dsa_port_enable_rt(dp, phy);
+ rtnl_unlock();
+
+ return err;
+}
+
+void dsa_port_disable_rt(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
+ if (dp->pl)
+ phylink_stop(dp->pl);
+
if (!dp->bridge_dev)
dsa_port_set_state_now(dp, BR_STATE_DISABLED);
@@ -93,6 +110,13 @@ void dsa_port_disable(struct dsa_port *dp)
ds->ops->port_disable(ds, port);
}
+void dsa_port_disable(struct dsa_port *dp)
+{
+ rtnl_lock();
+ dsa_port_disable_rt(dp);
+ rtnl_unlock();
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -415,9 +439,9 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
return phydev;
}
-void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void dsa_port_phylink_validate(struct phylink_config *config,
+ unsigned long *supported,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -427,10 +451,9 @@ void dsa_port_phylink_validate(struct phylink_config *config,
ds->ops->phylink_validate(ds, dp->index, supported, state);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_validate);
-void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
+static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -444,11 +467,10 @@ void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
if (ds->ops->phylink_mac_link_state(ds, dp->index, state) < 0)
state->link = 0;
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_pcs_get_state);
-void dsa_port_phylink_mac_config(struct phylink_config *config,
- unsigned int mode,
- const struct phylink_link_state *state)
+static void dsa_port_phylink_mac_config(struct phylink_config *config,
+ unsigned int mode,
+ const struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -458,9 +480,8 @@ void dsa_port_phylink_mac_config(struct phylink_config *config,
ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config);
-void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
+static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -470,11 +491,10 @@ void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
ds->ops->phylink_mac_an_restart(ds, dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart);
-void dsa_port_phylink_mac_link_down(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface)
+static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct phy_device *phydev = NULL;
@@ -491,12 +511,11 @@ void dsa_port_phylink_mac_link_down(struct phylink_config *config,
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down);
-void dsa_port_phylink_mac_link_up(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev)
+static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -509,7 +528,6 @@ void dsa_port_phylink_mac_link_up(struct phylink_config *config,
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up);
const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
@@ -605,6 +623,7 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
dp->pl_config.dev = ds->dev;
dp->pl_config.type = PHYLINK_DEV;
+ dp->pl_config.pcs_poll = ds->pcs_poll;
dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
mode, &dsa_port_phylink_mac_ops);
@@ -619,10 +638,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
goto err_phy_connect;
}
- rtnl_lock();
- phylink_start(dp->pl);
- rtnl_unlock();
-
return 0;
err_phy_connect:
@@ -633,9 +648,14 @@ err_phy_connect:
int dsa_port_link_register_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
+ struct device_node *phy_np;
- if (!ds->ops->adjust_link)
- return dsa_port_phylink_register(dp);
+ if (!ds->ops->adjust_link) {
+ phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
+ if (of_phy_is_fixed_link(dp->dn) || phy_np)
+ return dsa_port_phylink_register(dp);
+ return 0;
+ }
dev_warn(ds->dev,
"Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
@@ -650,11 +670,12 @@ void dsa_port_link_unregister_of(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->adjust_link) {
+ if (!ds->ops->adjust_link && dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
phylink_destroy(dp->pl);
+ dp->pl = NULL;
return;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 78ffc87dc25e..ddc0f9236928 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -22,8 +22,6 @@
#include "dsa_priv.h"
-static bool dsa_slave_dev_check(const struct net_device *dev);
-
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -90,12 +88,10 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- err = dsa_port_enable(dp, dev->phydev);
+ err = dsa_port_enable_rt(dp, dev->phydev);
if (err)
goto clear_promisc;
- phylink_start(dp->pl);
-
return 0;
clear_promisc:
@@ -116,12 +112,7 @@ static int dsa_slave_close(struct net_device *dev)
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- cancel_work_sync(&dp->xmit_work);
- skb_queue_purge(&dp->xmit_queue);
-
- phylink_stop(dp->pl);
-
- dsa_port_disable(dp);
+ dsa_port_disable_rt(dp);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
@@ -518,7 +509,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
s->tx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
- DSA_SKB_CB(skb)->deferred_xmit = false;
DSA_SKB_CB(skb)->clone = NULL;
/* Identify PTP protocol packets, clone them, and pass them to the
@@ -531,39 +521,13 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
*/
nskb = p->xmit(skb, dev);
if (!nskb) {
- if (!DSA_SKB_CB(skb)->deferred_xmit)
- kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
return dsa_enqueue_skb(nskb, dev);
}
-void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- DSA_SKB_CB(skb)->deferred_xmit = true;
-
- skb_queue_tail(&dp->xmit_queue, skb);
- schedule_work(&dp->xmit_work);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_defer_xmit);
-
-static void dsa_port_xmit_work(struct work_struct *work)
-{
- struct dsa_port *dp = container_of(work, struct dsa_port, xmit_work);
- struct dsa_switch *ds = dp->ds;
- struct sk_buff *skb;
-
- if (unlikely(!ds->ops->port_deferred_xmit))
- return;
-
- while ((skb = skb_dequeue(&dp->xmit_queue)) != NULL)
- ds->ops->port_deferred_xmit(ds, dp->index, skb);
-}
-
/* ethtool operations *******************************************************/
static void dsa_slave_get_drvinfo(struct net_device *dev,
@@ -1367,9 +1331,6 @@ int dsa_slave_suspend(struct net_device *slave_dev)
if (!netif_running(slave_dev))
return 0;
- cancel_work_sync(&dp->xmit_work);
- skb_queue_purge(&dp->xmit_queue);
-
netif_device_detach(slave_dev);
rtnl_lock();
@@ -1455,8 +1416,6 @@ int dsa_slave_create(struct dsa_port *port)
}
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- INIT_WORK(&port->xmit_work, dsa_port_xmit_work);
- skb_queue_head_init(&port->xmit_queue);
p->xmit = cpu_dp->tag_ops->xmit;
port->slave = slave_dev;
@@ -1508,7 +1467,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
free_netdev(slave_dev);
}
-static bool dsa_slave_dev_check(const struct net_device *dev)
+bool dsa_slave_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &dsa_slave_netdev_ops;
}
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
new file mode 100644
index 000000000000..55b00694cdba
--- /dev/null
+++ b/net/dsa/tag_ar9331.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+ */
+
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+
+#include "dsa_priv.h"
+
+#define AR9331_HDR_LEN 2
+#define AR9331_HDR_VERSION 1
+
+#define AR9331_HDR_VERSION_MASK GENMASK(15, 14)
+#define AR9331_HDR_PRIORITY_MASK GENMASK(13, 12)
+#define AR9331_HDR_TYPE_MASK GENMASK(10, 8)
+#define AR9331_HDR_BROADCAST BIT(7)
+#define AR9331_HDR_FROM_CPU BIT(6)
+/* AR9331_HDR_RESERVED - not used or may be version field.
+ * According to the AR8216 doc it should 0b10. On AR9331 it is 0b11 on RX path
+ * and should be set to 0b11 to make it work.
+ */
+#define AR9331_HDR_RESERVED_MASK GENMASK(5, 4)
+#define AR9331_HDR_PORT_NUM_MASK GENMASK(3, 0)
+
+static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ __le16 *phdr;
+ u16 hdr;
+
+ if (skb_cow_head(skb, AR9331_HDR_LEN) < 0)
+ return NULL;
+
+ phdr = skb_push(skb, AR9331_HDR_LEN);
+
+ hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION);
+ hdr |= AR9331_HDR_FROM_CPU | dp->index;
+ /* 0b10 for AR8216 and 0b11 for AR9331 */
+ hdr |= AR9331_HDR_RESERVED_MASK;
+
+ phdr[0] = cpu_to_le16(hdr);
+
+ return skb;
+}
+
+static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt)
+{
+ u8 ver, port;
+ u16 hdr;
+
+ if (unlikely(!pskb_may_pull(skb, AR9331_HDR_LEN)))
+ return NULL;
+
+ hdr = le16_to_cpu(*(__le16 *)skb_mac_header(skb));
+
+ ver = FIELD_GET(AR9331_HDR_VERSION_MASK, hdr);
+ if (unlikely(ver != AR9331_HDR_VERSION)) {
+ netdev_warn_once(ndev, "%s:%i wrong header version 0x%2x\n",
+ __func__, __LINE__, hdr);
+ return NULL;
+ }
+
+ if (unlikely(hdr & AR9331_HDR_FROM_CPU)) {
+ netdev_warn_once(ndev, "%s:%i packet should not be from cpu 0x%2x\n",
+ __func__, __LINE__, hdr);
+ return NULL;
+ }
+
+ skb_pull_rcsum(skb, AR9331_HDR_LEN);
+
+ /* Get source port information */
+ port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr);
+
+ skb->dev = dsa_master_find_slave(ndev, 0, port);
+ if (!skb->dev)
+ return NULL;
+
+ return skb;
+}
+
+static const struct dsa_device_ops ar9331_netdev_ops = {
+ .name = "ar9331",
+ .proto = DSA_TAG_PROTO_AR9331,
+ .xmit = ar9331_tag_xmit,
+ .rcv = ar9331_tag_rcv,
+ .overhead = AR9331_HDR_LEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_AR9331);
+module_dsa_tag_driver(ar9331_netdev_ops);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index c8a128c9e5e0..70db7c909f74 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -33,7 +33,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
- if (skb_cow_head(skb, 0) < 0)
+ if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
return NULL;
skb_push(skb, QCA_HDR_LEN);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 63ef2a14c934..5366ea430349 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -83,12 +83,24 @@ static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
return false;
}
+/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
+static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp,
+ struct sk_buff *skb)
+{
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ skb_queue_tail(&sp->xmit_queue, skb_get(skb));
+ kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
+
+ return NULL;
+}
+
static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
- struct dsa_switch *ds = dp->ds;
- u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index);
+ u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
@@ -97,7 +109,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
* is the .port_deferred_xmit driver callback.
*/
if (unlikely(sja1105_is_link_local(skb)))
- return dsa_defer_xmit(skb, netdev);
+ return sja1105_defer_xmit(dp->priv, skb);
/* If we are under a vlan_filtering bridge, IP termination on
* switch ports based on 802.1Q tags is simply too brittle to
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9040fe55e0f5..c8b903302ff2 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -335,22 +335,6 @@ int eth_mac_addr(struct net_device *dev, void *p)
}
EXPORT_SYMBOL(eth_mac_addr);
-/**
- * eth_change_mtu - set new MTU size
- * @dev: network device
- * @new_mtu: new Maximum Transfer Unit
- *
- * Allow changing MTU size. Needs to be overridden for devices
- * supporting jumbo frames.
- */
-int eth_change_mtu(struct net_device *dev, int new_mtu)
-{
- netdev_warn(dev, "%s is deprecated\n", __func__);
- dev->mtu = new_mtu;
- return 0;
-}
-EXPORT_SYMBOL(eth_change_mtu);
-
int eth_validate_addr(struct net_device *dev)
{
if (!is_valid_ether_addr(dev->dev_addr))
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
new file mode 100644
index 000000000000..424545a4aaec
--- /dev/null
+++ b/net/ethtool/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y += ioctl.o common.o
+
+obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
+
+ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
+ linkstate.o debug.o wol.o
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
new file mode 100644
index 000000000000..ef9197541cb3
--- /dev/null
+++ b/net/ethtool/bitset.c
@@ -0,0 +1,739 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool_netlink.h>
+#include <linux/bitmap.h>
+#include "netlink.h"
+#include "bitset.h"
+
+/* Some bitmaps are internally represented as an array of unsigned long, some
+ * as an array of u32 (some even as single u32 for now). To avoid the need of
+ * wrappers on caller side, we provide two set of functions: those with "32"
+ * suffix in their names expect u32 based bitmaps, those without it expect
+ * unsigned long bitmaps.
+ */
+
+static u32 ethnl_lower_bits(unsigned int n)
+{
+ return ~(u32)0 >> (32 - n % 32);
+}
+
+static u32 ethnl_upper_bits(unsigned int n)
+{
+ return ~(u32)0 << (n % 32);
+}
+
+/**
+ * ethnl_bitmap32_clear() - Clear u32 based bitmap
+ * @dst: bitmap to clear
+ * @start: beginning of the interval
+ * @end: end of the interval
+ * @mod: set if bitmap was modified
+ *
+ * Clear @nbits bits of a bitmap with indices @start <= i < @end
+ */
+static void ethnl_bitmap32_clear(u32 *dst, unsigned int start, unsigned int end,
+ bool *mod)
+{
+ unsigned int start_word = start / 32;
+ unsigned int end_word = end / 32;
+ unsigned int i;
+ u32 mask;
+
+ if (end <= start)
+ return;
+
+ if (start % 32) {
+ mask = ethnl_upper_bits(start);
+ if (end_word == start_word) {
+ mask &= ethnl_lower_bits(end);
+ if (dst[start_word] & mask) {
+ dst[start_word] &= ~mask;
+ *mod = true;
+ }
+ return;
+ }
+ if (dst[start_word] & mask) {
+ dst[start_word] &= ~mask;
+ *mod = true;
+ }
+ start_word++;
+ }
+
+ for (i = start_word; i < end_word; i++) {
+ if (dst[i]) {
+ dst[i] = 0;
+ *mod = true;
+ }
+ }
+ if (end % 32) {
+ mask = ethnl_lower_bits(end);
+ if (dst[end_word] & mask) {
+ dst[end_word] &= ~mask;
+ *mod = true;
+ }
+ }
+}
+
+/**
+ * ethnl_bitmap32_not_zero() - Check if any bit is set in an interval
+ * @map: bitmap to test
+ * @start: beginning of the interval
+ * @end: end of the interval
+ *
+ * Return: true if there is non-zero bit with index @start <= i < @end,
+ * false if the whole interval is zero
+ */
+static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start,
+ unsigned int end)
+{
+ unsigned int start_word = start / 32;
+ unsigned int end_word = end / 32;
+ u32 mask;
+
+ if (end <= start)
+ return true;
+
+ if (start % 32) {
+ mask = ethnl_upper_bits(start);
+ if (end_word == start_word) {
+ mask &= ethnl_lower_bits(end);
+ return map[start_word] & mask;
+ }
+ if (map[start_word] & mask)
+ return true;
+ start_word++;
+ }
+
+ if (!memchr_inv(map + start_word, '\0',
+ (end_word - start_word) * sizeof(u32)))
+ return true;
+ if (end % 32 == 0)
+ return true;
+ return map[end_word] & ethnl_lower_bits(end);
+}
+
+/**
+ * ethnl_bitmap32_update() - Modify u32 based bitmap according to value/mask
+ * pair
+ * @dst: bitmap to update
+ * @nbits: bit size of the bitmap
+ * @value: values to set
+ * @mask: mask of bits to set
+ * @mod: set to true if bitmap is modified, preserve if not
+ *
+ * Set bits in @dst bitmap which are set in @mask to values from @value, leave
+ * the rest untouched. If destination bitmap was modified, set @mod to true,
+ * leave as it is if not.
+ */
+static void ethnl_bitmap32_update(u32 *dst, unsigned int nbits,
+ const u32 *value, const u32 *mask, bool *mod)
+{
+ while (nbits > 0) {
+ u32 real_mask = mask ? *mask : ~(u32)0;
+ u32 new_value;
+
+ if (nbits < 32)
+ real_mask &= ethnl_lower_bits(nbits);
+ new_value = (*dst & ~real_mask) | (*value & real_mask);
+ if (new_value != *dst) {
+ *dst = new_value;
+ *mod = true;
+ }
+
+ if (nbits <= 32)
+ break;
+ dst++;
+ nbits -= 32;
+ value++;
+ if (mask)
+ mask++;
+ }
+}
+
+static bool ethnl_bitmap32_test_bit(const u32 *map, unsigned int index)
+{
+ return map[index / 32] & (1U << (index % 32));
+}
+
+/**
+ * ethnl_bitset32_size() - Calculate size of bitset nested attribute
+ * @val: value bitmap (u32 based)
+ * @mask: mask bitmap (u32 based, optional)
+ * @nbits: bit length of the bitset
+ * @names: array of bit names (optional)
+ * @compact: assume compact format for output
+ *
+ * Estimate length of netlink attribute composed by a later call to
+ * ethnl_put_bitset32() call with the same arguments.
+ *
+ * Return: negative error code or attribute length estimate
+ */
+int ethnl_bitset32_size(const u32 *val, const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact)
+{
+ unsigned int len = 0;
+
+ /* list flag */
+ if (!mask)
+ len += nla_total_size(sizeof(u32));
+ /* size */
+ len += nla_total_size(sizeof(u32));
+
+ if (compact) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+
+ /* value, mask */
+ len += (mask ? 2 : 1) * nla_total_size(nwords * sizeof(u32));
+ } else {
+ unsigned int bits_len = 0;
+ unsigned int bit_len, i;
+
+ for (i = 0; i < nbits; i++) {
+ const char *name = names ? names[i] : NULL;
+
+ if (!ethnl_bitmap32_test_bit(mask ?: val, i))
+ continue;
+ /* index */
+ bit_len = nla_total_size(sizeof(u32));
+ /* name */
+ if (name)
+ bit_len += ethnl_strz_size(name);
+ /* value */
+ if (mask && ethnl_bitmap32_test_bit(val, i))
+ bit_len += nla_total_size(0);
+
+ /* bit nest */
+ bits_len += nla_total_size(bit_len);
+ }
+ /* bits nest */
+ len += nla_total_size(bits_len);
+ }
+
+ /* outermost nest */
+ return nla_total_size(len);
+}
+
+/**
+ * ethnl_put_bitset32() - Put a bitset nest into a message
+ * @skb: skb with the message
+ * @attrtype: attribute type for the bitset nest
+ * @val: value bitmap (u32 based)
+ * @mask: mask bitmap (u32 based, optional)
+ * @nbits: bit length of the bitset
+ * @names: array of bit names (optional)
+ * @compact: use compact format for the output
+ *
+ * Compose a nested attribute representing a bitset. If @mask is null, simple
+ * bitmap (bit list) is created, if @mask is provided, represent a value/mask
+ * pair. Bit names are only used in verbose mode and when provided by calller.
+ *
+ * Return: 0 on success, negative error value on error
+ */
+int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val,
+ const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact)
+{
+ struct nlattr *nest;
+ struct nlattr *attr;
+
+ nest = nla_nest_start(skb, attrtype);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (!mask && nla_put_flag(skb, ETHTOOL_A_BITSET_NOMASK))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, ETHTOOL_A_BITSET_SIZE, nbits))
+ goto nla_put_failure;
+ if (compact) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+ unsigned int nbytes = nwords * sizeof(u32);
+ u32 *dst;
+
+ attr = nla_reserve(skb, ETHTOOL_A_BITSET_VALUE, nbytes);
+ if (!attr)
+ goto nla_put_failure;
+ dst = nla_data(attr);
+ memcpy(dst, val, nbytes);
+ if (nbits % 32)
+ dst[nwords - 1] &= ethnl_lower_bits(nbits);
+
+ if (mask) {
+ attr = nla_reserve(skb, ETHTOOL_A_BITSET_MASK, nbytes);
+ if (!attr)
+ goto nla_put_failure;
+ dst = nla_data(attr);
+ memcpy(dst, mask, nbytes);
+ if (nbits % 32)
+ dst[nwords - 1] &= ethnl_lower_bits(nbits);
+ }
+ } else {
+ struct nlattr *bits;
+ unsigned int i;
+
+ bits = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS);
+ if (!bits)
+ goto nla_put_failure;
+ for (i = 0; i < nbits; i++) {
+ const char *name = names ? names[i] : NULL;
+
+ if (!ethnl_bitmap32_test_bit(mask ?: val, i))
+ continue;
+ attr = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS_BIT);
+ if (!attr)
+ goto nla_put_failure;
+ if (nla_put_u32(skb, ETHTOOL_A_BITSET_BIT_INDEX, i))
+ goto nla_put_failure;
+ if (name &&
+ ethnl_put_strz(skb, ETHTOOL_A_BITSET_BIT_NAME, name))
+ goto nla_put_failure;
+ if (mask && ethnl_bitmap32_test_bit(val, i) &&
+ nla_put_flag(skb, ETHTOOL_A_BITSET_BIT_VALUE))
+ goto nla_put_failure;
+ nla_nest_end(skb, attr);
+ }
+ nla_nest_end(skb, bits);
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
+ [ETHTOOL_A_BITSET_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG },
+ [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32,
+ ETHNL_MAX_BITSET_SIZE),
+ [ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED },
+ [ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY },
+ [ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY },
+};
+
+static const struct nla_policy bit_policy[ETHTOOL_A_BITSET_BIT_MAX + 1] = {
+ [ETHTOOL_A_BITSET_BIT_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_BITSET_BIT_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_BITSET_BIT_NAME] = { .type = NLA_NUL_STRING },
+ [ETHTOOL_A_BITSET_BIT_VALUE] = { .type = NLA_FLAG },
+};
+
+/**
+ * ethnl_bitset_is_compact() - check if bitset attribute represents a compact
+ * bitset
+ * @bitset: nested attribute representing a bitset
+ * @compact: pointer for return value
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact)
+{
+ struct nlattr *tb[ETHTOOL_A_BITSET_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_MAX, bitset,
+ bitset_policy, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_BITSET_BITS]) {
+ if (tb[ETHTOOL_A_BITSET_VALUE] || tb[ETHTOOL_A_BITSET_MASK])
+ return -EINVAL;
+ *compact = false;
+ return 0;
+ }
+ if (!tb[ETHTOOL_A_BITSET_SIZE] || !tb[ETHTOOL_A_BITSET_VALUE])
+ return -EINVAL;
+
+ *compact = true;
+ return 0;
+}
+
+/**
+ * ethnl_name_to_idx() - look up string index for a name
+ * @names: array of ETH_GSTRING_LEN sized strings
+ * @n_names: number of strings in the array
+ * @name: name to look up
+ *
+ * Return: index of the string if found, -ENOENT if not found
+ */
+static int ethnl_name_to_idx(ethnl_string_array_t names, unsigned int n_names,
+ const char *name)
+{
+ unsigned int i;
+
+ if (!names)
+ return -ENOENT;
+
+ for (i = 0; i < n_names; i++) {
+ /* names[i] may not be null terminated */
+ if (!strncmp(names[i], name, ETH_GSTRING_LEN) &&
+ strlen(name) <= ETH_GSTRING_LEN)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits,
+ const struct nlattr *bit_attr, bool no_mask,
+ ethnl_string_array_t names,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[ETHTOOL_A_BITSET_BIT_MAX + 1];
+ int ret, idx;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_BIT_MAX, bit_attr,
+ bit_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_BITSET_BIT_INDEX]) {
+ const char *name;
+
+ idx = nla_get_u32(tb[ETHTOOL_A_BITSET_BIT_INDEX]);
+ if (idx >= nbits) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_BITSET_BIT_INDEX],
+ "bit index too high");
+ return -EOPNOTSUPP;
+ }
+ name = names ? names[idx] : NULL;
+ if (tb[ETHTOOL_A_BITSET_BIT_NAME] && name &&
+ strncmp(nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME]), name,
+ nla_len(tb[ETHTOOL_A_BITSET_BIT_NAME]))) {
+ NL_SET_ERR_MSG_ATTR(extack, bit_attr,
+ "bit index and name mismatch");
+ return -EINVAL;
+ }
+ } else if (tb[ETHTOOL_A_BITSET_BIT_NAME]) {
+ idx = ethnl_name_to_idx(names, nbits,
+ nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME]));
+ if (idx < 0) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_BITSET_BIT_NAME],
+ "bit name not found");
+ return -EOPNOTSUPP;
+ }
+ } else {
+ NL_SET_ERR_MSG_ATTR(extack, bit_attr,
+ "neither bit index nor name specified");
+ return -EINVAL;
+ }
+
+ *index = idx;
+ *val = no_mask || tb[ETHTOOL_A_BITSET_BIT_VALUE];
+ return 0;
+}
+
+static int
+ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
+ const struct nlattr *attr, struct nlattr **tb,
+ ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ struct nlattr *bit_attr;
+ bool no_mask;
+ int rem;
+ int ret;
+
+ if (tb[ETHTOOL_A_BITSET_VALUE]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE],
+ "value only allowed in compact bitset");
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_BITSET_MASK]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK],
+ "mask only allowed in compact bitset");
+ return -EINVAL;
+ }
+
+ no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ if (no_mask)
+ ethnl_bitmap32_clear(bitmap, 0, nbits, mod);
+
+ nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
+ bool old_val, new_val;
+ unsigned int idx;
+
+ if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) {
+ NL_SET_ERR_MSG_ATTR(extack, bit_attr,
+ "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS");
+ return -EINVAL;
+ }
+ ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask,
+ names, extack);
+ if (ret < 0)
+ return ret;
+ old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32));
+ if (new_val != old_val) {
+ if (new_val)
+ bitmap[idx / 32] |= ((u32)1 << (idx % 32));
+ else
+ bitmap[idx / 32] &= ~((u32)1 << (idx % 32));
+ *mod = true;
+ }
+ }
+
+ return 0;
+}
+
+static int ethnl_compact_sanity_checks(unsigned int nbits,
+ const struct nlattr *nest,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ bool no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ unsigned int attr_nbits, attr_nwords;
+ const struct nlattr *test_attr;
+
+ if (no_mask && tb[ETHTOOL_A_BITSET_MASK]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK],
+ "mask not allowed in list bitset");
+ return -EINVAL;
+ }
+ if (!tb[ETHTOOL_A_BITSET_SIZE]) {
+ NL_SET_ERR_MSG_ATTR(extack, nest,
+ "missing size in compact bitset");
+ return -EINVAL;
+ }
+ if (!tb[ETHTOOL_A_BITSET_VALUE]) {
+ NL_SET_ERR_MSG_ATTR(extack, nest,
+ "missing value in compact bitset");
+ return -EINVAL;
+ }
+ if (!no_mask && !tb[ETHTOOL_A_BITSET_MASK]) {
+ NL_SET_ERR_MSG_ATTR(extack, nest,
+ "missing mask in compact nonlist bitset");
+ return -EINVAL;
+ }
+
+ attr_nbits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]);
+ attr_nwords = DIV_ROUND_UP(attr_nbits, 32);
+ if (nla_len(tb[ETHTOOL_A_BITSET_VALUE]) != attr_nwords * sizeof(u32)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE],
+ "bitset value length does not match size");
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_BITSET_MASK] &&
+ nla_len(tb[ETHTOOL_A_BITSET_MASK]) != attr_nwords * sizeof(u32)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK],
+ "bitset mask length does not match size");
+ return -EINVAL;
+ }
+ if (attr_nbits <= nbits)
+ return 0;
+
+ test_attr = no_mask ? tb[ETHTOOL_A_BITSET_VALUE] :
+ tb[ETHTOOL_A_BITSET_MASK];
+ if (ethnl_bitmap32_not_zero(nla_data(test_attr), nbits, attr_nbits)) {
+ NL_SET_ERR_MSG_ATTR(extack, test_attr,
+ "cannot modify bits past kernel bitset size");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ethnl_update_bitset32() - Apply a bitset nest to a u32 based bitmap
+ * @bitmap: bitmap to update
+ * @nbits: size of the updated bitmap in bits
+ * @attr: nest attribute to parse and apply
+ * @names: array of bit names; may be null for compact format
+ * @extack: extack for error reporting
+ * @mod: set this to true if bitmap is modified, leave as it is if not
+ *
+ * Apply bitset netsted attribute to a bitmap. If the attribute represents
+ * a bit list, @bitmap is set to its contents; otherwise, bits in mask are
+ * set to values from value. Bitmaps in the attribute may be longer than
+ * @nbits but the message must not request modifying any bits past @nbits.
+ *
+ * Return: negative error code on failure, 0 on success
+ */
+int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ struct nlattr *tb[ETHTOOL_A_BITSET_MAX + 1];
+ unsigned int change_bits;
+ bool no_mask;
+ int ret;
+
+ if (!attr)
+ return 0;
+ ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_MAX, attr, bitset_policy,
+ extack);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_BITSET_BITS])
+ return ethnl_update_bitset32_verbose(bitmap, nbits, attr, tb,
+ names, extack, mod);
+ ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack);
+ if (ret < 0)
+ return ret;
+
+ no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ change_bits = min_t(unsigned int,
+ nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]), nbits);
+ ethnl_bitmap32_update(bitmap, change_bits,
+ nla_data(tb[ETHTOOL_A_BITSET_VALUE]),
+ no_mask ? NULL :
+ nla_data(tb[ETHTOOL_A_BITSET_MASK]),
+ mod);
+ if (no_mask && change_bits < nbits)
+ ethnl_bitmap32_clear(bitmap, change_bits, nbits, mod);
+
+ return 0;
+}
+
+#if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN)
+
+/* 64-bit big endian architectures are the only case when u32 based bitmaps
+ * and unsigned long based bitmaps have different memory layout so that we
+ * cannot simply cast the latter to the former and need actual wrappers
+ * converting the latter to the former.
+ *
+ * To reduce the number of slab allocations, the wrappers use fixed size local
+ * variables for bitmaps up to ETHNL_SMALL_BITMAP_BITS bits which is the
+ * majority of bitmaps used by ethtool.
+ */
+#define ETHNL_SMALL_BITMAP_BITS 128
+#define ETHNL_SMALL_BITMAP_WORDS DIV_ROUND_UP(ETHNL_SMALL_BITMAP_BITS, 32)
+
+int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 small_val32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 *mask32;
+ u32 *val32;
+ int ret;
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+
+ val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL);
+ if (!val32)
+ return -ENOMEM;
+ mask32 = val32 + nwords;
+ } else {
+ val32 = small_val32;
+ mask32 = small_mask32;
+ }
+
+ bitmap_to_arr32(val32, val, nbits);
+ if (mask)
+ bitmap_to_arr32(mask32, mask, nbits);
+ else
+ mask32 = NULL;
+ ret = ethnl_bitset32_size(val32, mask32, nbits, names, compact);
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS)
+ kfree(val32);
+
+ return ret;
+}
+
+int ethnl_put_bitset(struct sk_buff *skb, int attrtype,
+ const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 small_val32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 *mask32;
+ u32 *val32;
+ int ret;
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+
+ val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL);
+ if (!val32)
+ return -ENOMEM;
+ mask32 = val32 + nwords;
+ } else {
+ val32 = small_val32;
+ mask32 = small_mask32;
+ }
+
+ bitmap_to_arr32(val32, val, nbits);
+ if (mask)
+ bitmap_to_arr32(mask32, mask, nbits);
+ else
+ mask32 = NULL;
+ ret = ethnl_put_bitset32(skb, attrtype, val32, mask32, nbits, names,
+ compact);
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS)
+ kfree(val32);
+
+ return ret;
+}
+
+int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ u32 small_bitmap32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 *bitmap32 = small_bitmap32;
+ bool u32_mod = false;
+ int ret;
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS) {
+ unsigned int dst_words = DIV_ROUND_UP(nbits, 32);
+
+ bitmap32 = kmalloc_array(dst_words, sizeof(u32), GFP_KERNEL);
+ if (!bitmap32)
+ return -ENOMEM;
+ }
+
+ bitmap_to_arr32(bitmap32, bitmap, nbits);
+ ret = ethnl_update_bitset32(bitmap32, nbits, attr, names, extack,
+ &u32_mod);
+ if (u32_mod) {
+ bitmap_from_arr32(bitmap, bitmap32, nbits);
+ *mod = true;
+ }
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS)
+ kfree(bitmap32);
+
+ return ret;
+}
+
+#else
+
+/* On little endian 64-bit and all 32-bit architectures, an unsigned long
+ * based bitmap can be interpreted as u32 based one using a simple cast.
+ */
+
+int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ return ethnl_bitset32_size((const u32 *)val, (const u32 *)mask, nbits,
+ names, compact);
+}
+
+int ethnl_put_bitset(struct sk_buff *skb, int attrtype,
+ const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ return ethnl_put_bitset32(skb, attrtype, (const u32 *)val,
+ (const u32 *)mask, nbits, names, compact);
+}
+
+int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ return ethnl_update_bitset32((u32 *)bitmap, nbits, attr, names, extack,
+ mod);
+}
+
+#endif /* BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) */
diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h
new file mode 100644
index 000000000000..b849f9d19676
--- /dev/null
+++ b/net/ethtool/bitset.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_ETHTOOL_BITSET_H
+#define _NET_ETHTOOL_BITSET_H
+
+#define ETHNL_MAX_BITSET_SIZE S16_MAX
+
+typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN];
+
+int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact);
+int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact);
+int ethnl_bitset32_size(const u32 *val, const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact);
+int ethnl_put_bitset(struct sk_buff *skb, int attrtype,
+ const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact);
+int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val,
+ const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact);
+int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod);
+int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod);
+
+#endif /* _NET_ETHTOOL_BITSET_H */
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
new file mode 100644
index 000000000000..636ec6d5110e
--- /dev/null
+++ b/net/ethtool/common.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "common.h"
+
+const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
+ [NETIF_F_SG_BIT] = "tx-scatter-gather",
+ [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
+ [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
+ [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
+ [NETIF_F_HIGHDMA_BIT] = "highdma",
+ [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
+ [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
+
+ [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
+ [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
+ [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
+ [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
+ [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
+ [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
+ [NETIF_F_LLTX_BIT] = "tx-lockless",
+ [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
+ [NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
+ [NETIF_F_LRO_BIT] = "rx-lro",
+
+ [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
+ [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
+ [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
+ [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
+ [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
+ [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
+ [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
+ [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
+ [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
+ [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
+ [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
+ [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
+ [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
+
+ [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
+ [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
+ [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
+ [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
+ [NETIF_F_RXHASH_BIT] = "rx-hashing",
+ [NETIF_F_RXCSUM_BIT] = "rx-checksum",
+ [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
+ [NETIF_F_LOOPBACK_BIT] = "loopback",
+ [NETIF_F_RXFCS_BIT] = "rx-fcs",
+ [NETIF_F_RXALL_BIT] = "rx-all",
+ [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
+ [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
+ [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
+ [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
+ [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
+ [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
+ [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
+ [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
+ [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list",
+};
+
+const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
+ [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
+ [ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
+};
+
+const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
+ [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+ [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
+};
+
+const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
+ [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
+ [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
+};
+
+#define __LINK_MODE_NAME(speed, type, duplex) \
+ #speed "base" #type "/" #duplex
+#define __DEFINE_LINK_MODE_NAME(speed, type, duplex) \
+ [ETHTOOL_LINK_MODE(speed, type, duplex)] = \
+ __LINK_MODE_NAME(speed, type, duplex)
+#define __DEFINE_SPECIAL_MODE_NAME(_mode, _name) \
+ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = _name
+
+const char link_mode_names[][ETH_GSTRING_LEN] = {
+ __DEFINE_LINK_MODE_NAME(10, T, Half),
+ __DEFINE_LINK_MODE_NAME(10, T, Full),
+ __DEFINE_LINK_MODE_NAME(100, T, Half),
+ __DEFINE_LINK_MODE_NAME(100, T, Full),
+ __DEFINE_LINK_MODE_NAME(1000, T, Half),
+ __DEFINE_LINK_MODE_NAME(1000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Autoneg, "Autoneg"),
+ __DEFINE_SPECIAL_MODE_NAME(TP, "TP"),
+ __DEFINE_SPECIAL_MODE_NAME(AUI, "AUI"),
+ __DEFINE_SPECIAL_MODE_NAME(MII, "MII"),
+ __DEFINE_SPECIAL_MODE_NAME(FIBRE, "FIBRE"),
+ __DEFINE_SPECIAL_MODE_NAME(BNC, "BNC"),
+ __DEFINE_LINK_MODE_NAME(10000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Pause, "Pause"),
+ __DEFINE_SPECIAL_MODE_NAME(Asym_Pause, "Asym_Pause"),
+ __DEFINE_LINK_MODE_NAME(2500, X, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Backplane, "Backplane"),
+ __DEFINE_LINK_MODE_NAME(1000, KX, Full),
+ __DEFINE_LINK_MODE_NAME(10000, KX4, Full),
+ __DEFINE_LINK_MODE_NAME(10000, KR, Full),
+ __DEFINE_SPECIAL_MODE_NAME(10000baseR_FEC, "10000baseR_FEC"),
+ __DEFINE_LINK_MODE_NAME(20000, MLD2, Full),
+ __DEFINE_LINK_MODE_NAME(20000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(40000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, LR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, LR4, Full),
+ __DEFINE_LINK_MODE_NAME(25000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(25000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(25000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(50000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR4_ER4, Full),
+ __DEFINE_LINK_MODE_NAME(50000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(1000, X, Full),
+ __DEFINE_LINK_MODE_NAME(10000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, LR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, LRM, Full),
+ __DEFINE_LINK_MODE_NAME(10000, ER, Full),
+ __DEFINE_LINK_MODE_NAME(2500, T, Full),
+ __DEFINE_LINK_MODE_NAME(5000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_NONE, "None"),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_RS, "RS"),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_BASER, "BASER"),
+ __DEFINE_LINK_MODE_NAME(50000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, DR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, DR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(100, T1, Full),
+ __DEFINE_LINK_MODE_NAME(1000, T1, Full),
+ __DEFINE_LINK_MODE_NAME(400000, KR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, SR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, CR8, Full),
+};
+static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+const char netif_msg_class_names[][ETH_GSTRING_LEN] = {
+ [NETIF_MSG_DRV_BIT] = "drv",
+ [NETIF_MSG_PROBE_BIT] = "probe",
+ [NETIF_MSG_LINK_BIT] = "link",
+ [NETIF_MSG_TIMER_BIT] = "timer",
+ [NETIF_MSG_IFDOWN_BIT] = "ifdown",
+ [NETIF_MSG_IFUP_BIT] = "ifup",
+ [NETIF_MSG_RX_ERR_BIT] = "rx_err",
+ [NETIF_MSG_TX_ERR_BIT] = "tx_err",
+ [NETIF_MSG_TX_QUEUED_BIT] = "tx_queued",
+ [NETIF_MSG_INTR_BIT] = "intr",
+ [NETIF_MSG_TX_DONE_BIT] = "tx_done",
+ [NETIF_MSG_RX_STATUS_BIT] = "rx_status",
+ [NETIF_MSG_PKTDATA_BIT] = "pktdata",
+ [NETIF_MSG_HW_BIT] = "hw",
+ [NETIF_MSG_WOL_BIT] = "wol",
+};
+static_assert(ARRAY_SIZE(netif_msg_class_names) == NETIF_MSG_CLASS_COUNT);
+
+const char wol_mode_names[][ETH_GSTRING_LEN] = {
+ [const_ilog2(WAKE_PHY)] = "phy",
+ [const_ilog2(WAKE_UCAST)] = "ucast",
+ [const_ilog2(WAKE_MCAST)] = "mcast",
+ [const_ilog2(WAKE_BCAST)] = "bcast",
+ [const_ilog2(WAKE_ARP)] = "arp",
+ [const_ilog2(WAKE_MAGIC)] = "magic",
+ [const_ilog2(WAKE_MAGICSECURE)] = "magicsecure",
+ [const_ilog2(WAKE_FILTER)] = "filter",
+};
+static_assert(ARRAY_SIZE(wol_mode_names) == WOL_MODE_COUNT);
+
+/* return false if legacy contained non-0 deprecated fields
+ * maxtxpkt/maxrxpkt. rest of ksettings always updated
+ */
+bool
+convert_legacy_settings_to_link_ksettings(
+ struct ethtool_link_ksettings *link_ksettings,
+ const struct ethtool_cmd *legacy_settings)
+{
+ bool retval = true;
+
+ memset(link_ksettings, 0, sizeof(*link_ksettings));
+
+ /* This is used to tell users that driver is still using these
+ * deprecated legacy fields, and they should not use
+ * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
+ */
+ if (legacy_settings->maxtxpkt ||
+ legacy_settings->maxrxpkt)
+ retval = false;
+
+ ethtool_convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.supported,
+ legacy_settings->supported);
+ ethtool_convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.advertising,
+ legacy_settings->advertising);
+ ethtool_convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.lp_advertising,
+ legacy_settings->lp_advertising);
+ link_ksettings->base.speed
+ = ethtool_cmd_speed(legacy_settings);
+ link_ksettings->base.duplex
+ = legacy_settings->duplex;
+ link_ksettings->base.port
+ = legacy_settings->port;
+ link_ksettings->base.phy_address
+ = legacy_settings->phy_address;
+ link_ksettings->base.autoneg
+ = legacy_settings->autoneg;
+ link_ksettings->base.mdio_support
+ = legacy_settings->mdio_support;
+ link_ksettings->base.eth_tp_mdix
+ = legacy_settings->eth_tp_mdix;
+ link_ksettings->base.eth_tp_mdix_ctrl
+ = legacy_settings->eth_tp_mdix_ctrl;
+ return retval;
+}
+
+int __ethtool_get_link(struct net_device *dev)
+{
+ if (!dev->ethtool_ops->get_link)
+ return -EOPNOTSUPP;
+
+ return netif_running(dev) && dev->ethtool_ops->get_link(dev);
+}
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
new file mode 100644
index 000000000000..40ba74e0b9bb
--- /dev/null
+++ b/net/ethtool/common.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ETHTOOL_COMMON_H
+#define _ETHTOOL_COMMON_H
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+
+/* compose link mode index from speed, type and duplex */
+#define ETHTOOL_LINK_MODE(speed, type, duplex) \
+ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT
+
+extern const char
+netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
+extern const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN];
+extern const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN];
+extern const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN];
+extern const char link_mode_names[][ETH_GSTRING_LEN];
+extern const char netif_msg_class_names[][ETH_GSTRING_LEN];
+extern const char wol_mode_names[][ETH_GSTRING_LEN];
+
+int __ethtool_get_link(struct net_device *dev);
+
+bool convert_legacy_settings_to_link_ksettings(
+ struct ethtool_link_ksettings *link_ksettings,
+ const struct ethtool_cmd *legacy_settings);
+
+#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
new file mode 100644
index 000000000000..aaef4843e6ba
--- /dev/null
+++ b/net/ethtool/debug.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct debug_req_info {
+ struct ethnl_req_info base;
+};
+
+struct debug_reply_data {
+ struct ethnl_reply_data base;
+ u32 msg_mask;
+};
+
+#define DEBUG_REPDATA(__reply_base) \
+ container_of(__reply_base, struct debug_reply_data, base)
+
+static const struct nla_policy
+debug_get_policy[ETHTOOL_A_DEBUG_MAX + 1] = {
+ [ETHTOOL_A_DEBUG_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_DEBUG_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_DEBUG_MSGMASK] = { .type = NLA_REJECT },
+};
+
+static int debug_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct debug_reply_data *data = DEBUG_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ if (!dev->ethtool_ops->get_msglevel)
+ return -EOPNOTSUPP;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ data->msg_mask = dev->ethtool_ops->get_msglevel(dev);
+ ethnl_ops_complete(dev);
+
+ return 0;
+}
+
+static int debug_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct debug_reply_data *data = DEBUG_REPDATA(reply_base);
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+
+ return ethnl_bitset32_size(&data->msg_mask, NULL, NETIF_MSG_CLASS_COUNT,
+ netif_msg_class_names, compact);
+}
+
+static int debug_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct debug_reply_data *data = DEBUG_REPDATA(reply_base);
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+
+ return ethnl_put_bitset32(skb, ETHTOOL_A_DEBUG_MSGMASK, &data->msg_mask,
+ NULL, NETIF_MSG_CLASS_COUNT,
+ netif_msg_class_names, compact);
+}
+
+const struct ethnl_request_ops ethnl_debug_request_ops = {
+ .request_cmd = ETHTOOL_MSG_DEBUG_GET,
+ .reply_cmd = ETHTOOL_MSG_DEBUG_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_DEBUG_HEADER,
+ .max_attr = ETHTOOL_A_DEBUG_MAX,
+ .req_info_size = sizeof(struct debug_req_info),
+ .reply_data_size = sizeof(struct debug_reply_data),
+ .request_policy = debug_get_policy,
+
+ .prepare_data = debug_prepare_data,
+ .reply_size = debug_reply_size,
+ .fill_reply = debug_fill_reply,
+};
+
+/* DEBUG_SET */
+
+static const struct nla_policy
+debug_set_policy[ETHTOOL_A_DEBUG_MAX + 1] = {
+ [ETHTOOL_A_DEBUG_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_DEBUG_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_DEBUG_MSGMASK] = { .type = NLA_NESTED },
+};
+
+int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ETHTOOL_A_DEBUG_MAX + 1];
+ struct ethnl_req_info req_info = {};
+ struct net_device *dev;
+ bool mod = false;
+ u32 msg_mask;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+ ETHTOOL_A_DEBUG_MAX, debug_set_policy,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_DEBUG_HEADER],
+ genl_info_net(info), info->extack, true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+ if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel)
+ return -EOPNOTSUPP;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ msg_mask = dev->ethtool_ops->get_msglevel(dev);
+ ret = ethnl_update_bitset32(&msg_mask, NETIF_MSG_CLASS_COUNT,
+ tb[ETHTOOL_A_DEBUG_MSGMASK],
+ netif_msg_class_names, info->extack, &mod);
+ if (ret < 0 || !mod)
+ goto out_ops;
+
+ dev->ethtool_ops->set_msglevel(dev, msg_mask);
+ ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF, NULL);
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/core/ethtool.c b/net/ethtool/ioctl.c
index cd9bc67381b2..b987052d91ef 100644
--- a/net/core/ethtool.c
+++ b/net/ethtool/ioctl.c
@@ -17,6 +17,7 @@
#include <linux/phy.h>
#include <linux/bitops.h>
#include <linux/uaccess.h>
+#include <linux/vermagic.h>
#include <linux/vmalloc.h>
#include <linux/sfp.h>
#include <linux/slab.h>
@@ -26,6 +27,9 @@
#include <net/devlink.h>
#include <net/xdp_sock.h>
#include <net/flow_offload.h>
+#include <linux/ethtool_netlink.h>
+
+#include "common.h"
/*
* Some useful ethtool_ops methods that're device independent.
@@ -54,88 +58,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info);
#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
-static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
- [NETIF_F_SG_BIT] = "tx-scatter-gather",
- [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
- [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
- [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
- [NETIF_F_HIGHDMA_BIT] = "highdma",
- [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
- [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
-
- [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
- [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
- [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
- [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
- [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
- [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
- [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
- [NETIF_F_GRO_BIT] = "rx-gro",
- [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
- [NETIF_F_LRO_BIT] = "rx-lro",
-
- [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
- [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
- [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
- [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
- [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
- [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
- [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
- [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
- [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
- [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
- [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
- [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
- [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
- [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
-
- [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
- [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
- [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
- [NETIF_F_RXHASH_BIT] = "rx-hashing",
- [NETIF_F_RXCSUM_BIT] = "rx-checksum",
- [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
- [NETIF_F_LOOPBACK_BIT] = "loopback",
- [NETIF_F_RXFCS_BIT] = "rx-fcs",
- [NETIF_F_RXALL_BIT] = "rx-all",
- [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
- [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
- [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
- [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
- [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
- [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
- [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
- [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
-};
-
-static const char
-rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
- [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
- [ETH_RSS_HASH_XOR_BIT] = "xor",
- [ETH_RSS_HASH_CRC32_BIT] = "crc32",
-};
-
-static const char
-tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
- [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
- [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
-};
-
-static const char
-phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
- [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
- [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
-};
-
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -234,6 +156,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
!ops->get_ethtool_phy_stats)
return phy_ethtool_get_sset_count(dev->phydev);
+ if (sset == ETH_SS_LINK_MODES)
+ return __ETHTOOL_LINK_MODE_MASK_NBITS;
+
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
@@ -258,6 +183,9 @@ static void __ethtool_get_strings(struct net_device *dev,
else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
!ops->get_ethtool_phy_stats)
phy_ethtool_get_strings(dev->phydev, data);
+ else if (stringset == ETH_SS_LINK_MODES)
+ memcpy(data, link_mode_names,
+ __ETHTOOL_LINK_MODE_MASK_NBITS * ETH_GSTRING_LEN);
else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
@@ -432,54 +360,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
}
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
-/* return false if legacy contained non-0 deprecated fields
- * maxtxpkt/maxrxpkt. rest of ksettings always updated
- */
-static bool
-convert_legacy_settings_to_link_ksettings(
- struct ethtool_link_ksettings *link_ksettings,
- const struct ethtool_cmd *legacy_settings)
-{
- bool retval = true;
-
- memset(link_ksettings, 0, sizeof(*link_ksettings));
-
- /* This is used to tell users that driver is still using these
- * deprecated legacy fields, and they should not use
- * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
- */
- if (legacy_settings->maxtxpkt ||
- legacy_settings->maxrxpkt)
- retval = false;
-
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.supported,
- legacy_settings->supported);
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.advertising,
- legacy_settings->advertising);
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.lp_advertising,
- legacy_settings->lp_advertising);
- link_ksettings->base.speed
- = ethtool_cmd_speed(legacy_settings);
- link_ksettings->base.duplex
- = legacy_settings->duplex;
- link_ksettings->base.port
- = legacy_settings->port;
- link_ksettings->base.phy_address
- = legacy_settings->phy_address;
- link_ksettings->base.autoneg
- = legacy_settings->autoneg;
- link_ksettings->base.mdio_support
- = legacy_settings->mdio_support;
- link_ksettings->base.eth_tp_mdix
- = legacy_settings->eth_tp_mdix;
- link_ksettings->base.eth_tp_mdix_ctrl
- = legacy_settings->eth_tp_mdix_ctrl;
- return retval;
-}
-
/* return false if ksettings link modes had higher bits
* set. legacy_settings always updated (best effort)
*/
@@ -693,7 +573,12 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
!= link_ksettings.base.link_mode_masks_nwords)
return -EINVAL;
- return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ err = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ if (err >= 0) {
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ }
+ return err;
}
/* Query device for its ethtool_cmd settings.
@@ -742,6 +627,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_link_ksettings link_ksettings;
struct ethtool_cmd cmd;
+ int ret;
ASSERT_RTNL();
@@ -754,7 +640,12 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
return -EINVAL;
link_ksettings.base.link_mode_masks_nwords =
__ETHTOOL_LINK_MODE_MASK_NU32;
- return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ if (ret >= 0) {
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ }
+ return ret;
}
static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
@@ -765,6 +656,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
+ strlcpy(info.version, UTS_RELEASE, sizeof(info.version));
if (ops->get_drvinfo) {
ops->get_drvinfo(dev, &info);
} else if (dev->dev.parent && dev->dev.parent->driver) {
@@ -1426,6 +1318,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return ret;
dev->wol_enabled = !!wol.wolopts;
+ ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
return 0;
}
@@ -1475,12 +1368,12 @@ static int ethtool_nway_reset(struct net_device *dev)
static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+ int link = __ethtool_get_link(dev);
- if (!dev->ethtool_ops->get_link)
- return -EOPNOTSUPP;
-
- edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+ if (link < 0)
+ return link;
+ edata.data = link;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
@@ -2170,8 +2063,8 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GET_TS_INFO;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- err = phydev->drv->ts_info(phydev, &info);
+ if (phy_has_tsinfo(phydev)) {
+ err = phy_ts_info(phydev, &info);
} else if (ops->get_ts_info) {
err = ops->get_ts_info(dev, &info);
} else {
@@ -2655,6 +2548,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SMSGLVL:
rc = ethtool_set_value_void(dev, useraddr,
dev->ethtool_ops->set_msglevel);
+ if (!rc)
+ ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF, NULL);
break;
case ETHTOOL_GEEE:
rc = ethtool_get_eee(dev, useraddr);
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
new file mode 100644
index 000000000000..5d16cb4e8693
--- /dev/null
+++ b/net/ethtool/linkinfo.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct linkinfo_req_info {
+ struct ethnl_req_info base;
+};
+
+struct linkinfo_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_link_ksettings ksettings;
+ struct ethtool_link_settings *lsettings;
+};
+
+#define LINKINFO_REPDATA(__reply_base) \
+ container_of(__reply_base, struct linkinfo_reply_data, base)
+
+static const struct nla_policy
+linkinfo_get_policy[ETHTOOL_A_LINKINFO_MAX + 1] = {
+ [ETHTOOL_A_LINKINFO_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKINFO_PORT] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_PHYADDR] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TP_MDIX] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .type = NLA_REJECT },
+};
+
+static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ data->lsettings = &data->ksettings.base;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
+ if (ret < 0 && info)
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int linkinfo_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u8)) /* LINKINFO_PORT */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_PHYADDR */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_TP_MDIX */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_TP_MDIX_CTRL */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_TRANSCEIVER */
+ + 0;
+}
+
+static int linkinfo_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
+
+ if (nla_put_u8(skb, ETHTOOL_A_LINKINFO_PORT, data->lsettings->port) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_PHYADDR,
+ data->lsettings->phy_address) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_TP_MDIX,
+ data->lsettings->eth_tp_mdix) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,
+ data->lsettings->eth_tp_mdix_ctrl) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_TRANSCEIVER,
+ data->lsettings->transceiver))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_linkinfo_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKINFO_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKINFO_HEADER,
+ .max_attr = ETHTOOL_A_LINKINFO_MAX,
+ .req_info_size = sizeof(struct linkinfo_req_info),
+ .reply_data_size = sizeof(struct linkinfo_reply_data),
+ .request_policy = linkinfo_get_policy,
+
+ .prepare_data = linkinfo_prepare_data,
+ .reply_size = linkinfo_reply_size,
+ .fill_reply = linkinfo_fill_reply,
+};
+
+/* LINKINFO_SET */
+
+static const struct nla_policy
+linkinfo_set_policy[ETHTOOL_A_LINKINFO_MAX + 1] = {
+ [ETHTOOL_A_LINKINFO_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKINFO_PORT] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKINFO_PHYADDR] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKINFO_TP_MDIX] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .type = NLA_REJECT },
+};
+
+int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ETHTOOL_A_LINKINFO_MAX + 1];
+ struct ethtool_link_ksettings ksettings = {};
+ struct ethtool_link_settings *lsettings;
+ struct ethnl_req_info req_info = {};
+ struct net_device *dev;
+ bool mod = false;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+ ETHTOOL_A_LINKINFO_MAX, linkinfo_set_policy,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKINFO_HEADER],
+ genl_info_net(info), info->extack, true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+ if (!dev->ethtool_ops->get_link_ksettings ||
+ !dev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = __ethtool_get_link_ksettings(dev, &ksettings);
+ if (ret < 0) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ goto out_ops;
+ }
+ lsettings = &ksettings.base;
+
+ ethnl_update_u8(&lsettings->port, tb[ETHTOOL_A_LINKINFO_PORT], &mod);
+ ethnl_update_u8(&lsettings->phy_address, tb[ETHTOOL_A_LINKINFO_PHYADDR],
+ &mod);
+ ethnl_update_u8(&lsettings->eth_tp_mdix_ctrl,
+ tb[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL], &mod);
+ ret = 0;
+ if (!mod)
+ goto out_ops;
+
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
+ if (ret < 0)
+ GENL_SET_ERR_MSG(info, "link settings update failed");
+ else
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
new file mode 100644
index 000000000000..96f20be64553
--- /dev/null
+++ b/net/ethtool/linkmodes.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct linkmodes_req_info {
+ struct ethnl_req_info base;
+};
+
+struct linkmodes_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_link_ksettings ksettings;
+ struct ethtool_link_settings *lsettings;
+ bool peer_empty;
+};
+
+#define LINKMODES_REPDATA(__reply_base) \
+ container_of(__reply_base, struct linkmodes_reply_data, base)
+
+static const struct nla_policy
+linkmodes_get_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
+ [ETHTOOL_A_LINKMODES_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKMODES_AUTONEG] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_OURS] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_PEER] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_REJECT },
+};
+
+static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ data->lsettings = &data->ksettings.base;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
+ if (ret < 0 && info) {
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ goto out;
+ }
+
+ data->peer_empty =
+ bitmap_empty(data->ksettings.link_modes.lp_advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+out:
+ ethnl_ops_complete(dev);
+ return ret;
+}
+
+static int linkmodes_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
+ const struct ethtool_link_ksettings *ksettings = &data->ksettings;
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ int len, ret;
+
+ len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */
+ + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */
+ + nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */
+ + 0;
+ ret = ethnl_bitset_size(ksettings->link_modes.advertising,
+ ksettings->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ if (!data->peer_empty) {
+ ret = ethnl_bitset_size(ksettings->link_modes.lp_advertising,
+ NULL, __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ }
+
+ return len;
+}
+
+static int linkmodes_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
+ const struct ethtool_link_ksettings *ksettings = &data->ksettings;
+ const struct ethtool_link_settings *lsettings = &ksettings->base;
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ int ret;
+
+ if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_AUTONEG, lsettings->autoneg))
+ return -EMSGSIZE;
+
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_OURS,
+ ksettings->link_modes.advertising,
+ ksettings->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names,
+ compact);
+ if (ret < 0)
+ return -EMSGSIZE;
+ if (!data->peer_empty) {
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_PEER,
+ ksettings->link_modes.lp_advertising,
+ NULL, __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
+ if (ret < 0)
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_u32(skb, ETHTOOL_A_LINKMODES_SPEED, lsettings->speed) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKMODES_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKMODES_HEADER,
+ .max_attr = ETHTOOL_A_LINKMODES_MAX,
+ .req_info_size = sizeof(struct linkmodes_req_info),
+ .reply_data_size = sizeof(struct linkmodes_reply_data),
+ .request_policy = linkmodes_get_policy,
+
+ .prepare_data = linkmodes_prepare_data,
+ .reply_size = linkmodes_reply_size,
+ .fill_reply = linkmodes_fill_reply,
+};
+
+/* LINKMODES_SET */
+
+struct link_mode_info {
+ int speed;
+ u8 duplex;
+};
+
+#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
+ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
+ .speed = SPEED_ ## _speed, \
+ .duplex = __DUPLEX_ ## _duplex \
+ }
+#define __DUPLEX_Half DUPLEX_HALF
+#define __DUPLEX_Full DUPLEX_FULL
+#define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \
+ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \
+ .speed = SPEED_UNKNOWN, \
+ .duplex = DUPLEX_UNKNOWN, \
+ }
+
+static const struct link_mode_info link_mode_params[] = {
+ __DEFINE_LINK_MODE_PARAMS(10, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(100, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(1000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Autoneg),
+ __DEFINE_SPECIAL_MODE_PARAMS(TP),
+ __DEFINE_SPECIAL_MODE_PARAMS(AUI),
+ __DEFINE_SPECIAL_MODE_PARAMS(MII),
+ __DEFINE_SPECIAL_MODE_PARAMS(FIBRE),
+ __DEFINE_SPECIAL_MODE_PARAMS(BNC),
+ __DEFINE_LINK_MODE_PARAMS(10000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Pause),
+ __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause),
+ __DEFINE_LINK_MODE_PARAMS(2500, X, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Backplane),
+ __DEFINE_LINK_MODE_PARAMS(1000, KX, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, KR, Full),
+ [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = {
+ .speed = SPEED_10000,
+ .duplex = DUPLEX_FULL,
+ },
+ __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full),
+ __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, X, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, LR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, ER, Full),
+ __DEFINE_LINK_MODE_PARAMS(2500, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(5000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER),
+ __DEFINE_LINK_MODE_PARAMS(50000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, T1, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, T1, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full),
+};
+
+static const struct nla_policy
+linkmodes_set_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
+ [ETHTOOL_A_LINKMODES_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKMODES_AUTONEG] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKMODES_OURS] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKMODES_PEER] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 },
+ [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 },
+};
+
+/* Set advertised link modes to all supported modes matching requested speed
+ * and duplex values. Called when autonegotiation is on, speed or duplex is
+ * requested but no link mode change. This is done in userspace with ioctl()
+ * interface, move it into kernel for netlink.
+ * Returns true if advertised modes bitmap was modified.
+ */
+static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings,
+ bool req_speed, bool req_duplex)
+{
+ unsigned long *advertising = ksettings->link_modes.advertising;
+ unsigned long *supported = ksettings->link_modes.supported;
+ DECLARE_BITMAP(old_adv, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ unsigned int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(link_mode_params) !=
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ bitmap_copy(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) {
+ const struct link_mode_info *info = &link_mode_params[i];
+
+ if (info->speed == SPEED_UNKNOWN)
+ continue;
+ if (test_bit(i, supported) &&
+ (!req_speed || info->speed == ksettings->base.speed) &&
+ (!req_duplex || info->duplex == ksettings->base.duplex))
+ set_bit(i, advertising);
+ else
+ clear_bit(i, advertising);
+ }
+
+ return !bitmap_equal(old_adv, advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
+ struct ethtool_link_ksettings *ksettings,
+ bool *mod)
+{
+ struct ethtool_link_settings *lsettings = &ksettings->base;
+ bool req_speed, req_duplex;
+ int ret;
+
+ *mod = false;
+ req_speed = tb[ETHTOOL_A_LINKMODES_SPEED];
+ req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX];
+
+ ethnl_update_u8(&lsettings->autoneg, tb[ETHTOOL_A_LINKMODES_AUTONEG],
+ mod);
+ ret = ethnl_update_bitset(ksettings->link_modes.advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ tb[ETHTOOL_A_LINKMODES_OURS], link_mode_names,
+ info->extack, mod);
+ if (ret < 0)
+ return ret;
+ ethnl_update_u32(&lsettings->speed, tb[ETHTOOL_A_LINKMODES_SPEED],
+ mod);
+ ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX],
+ mod);
+
+ if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg &&
+ (req_speed || req_duplex) &&
+ ethnl_auto_linkmodes(ksettings, req_speed, req_duplex))
+ *mod = true;
+
+ return 0;
+}
+
+int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ETHTOOL_A_LINKMODES_MAX + 1];
+ struct ethtool_link_ksettings ksettings = {};
+ struct ethnl_req_info req_info = {};
+ struct net_device *dev;
+ bool mod = false;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+ ETHTOOL_A_LINKMODES_MAX, linkmodes_set_policy,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKMODES_HEADER],
+ genl_info_net(info), info->extack, true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+ if (!dev->ethtool_ops->get_link_ksettings ||
+ !dev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = __ethtool_get_link_ksettings(dev, &ksettings);
+ if (ret < 0) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ goto out_ops;
+ }
+
+ ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod);
+ if (ret < 0)
+ goto out_ops;
+
+ if (mod) {
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
+ if (ret < 0)
+ GENL_SET_ERR_MSG(info, "link settings update failed");
+ else
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ }
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
new file mode 100644
index 000000000000..2740cde0a182
--- /dev/null
+++ b/net/ethtool/linkstate.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct linkstate_req_info {
+ struct ethnl_req_info base;
+};
+
+struct linkstate_reply_data {
+ struct ethnl_reply_data base;
+ int link;
+};
+
+#define LINKSTATE_REPDATA(__reply_base) \
+ container_of(__reply_base, struct linkstate_reply_data, base)
+
+static const struct nla_policy
+linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
+ [ETHTOOL_A_LINKSTATE_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT },
+};
+
+static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ data->link = __ethtool_get_link(dev);
+ ethnl_ops_complete(dev);
+
+ return 0;
+}
+
+static int linkstate_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ + 0;
+}
+
+static int linkstate_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
+
+ if (data->link >= 0 &&
+ nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_linkstate_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKSTATE_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKSTATE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKSTATE_HEADER,
+ .max_attr = ETHTOOL_A_LINKSTATE_MAX,
+ .req_info_size = sizeof(struct linkstate_req_info),
+ .reply_data_size = sizeof(struct linkstate_reply_data),
+ .request_policy = linkstate_get_policy,
+
+ .prepare_data = linkstate_prepare_data,
+ .reply_size = linkstate_reply_size,
+ .fill_reply = linkstate_fill_reply,
+};
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
new file mode 100644
index 000000000000..180c194fab07
--- /dev/null
+++ b/net/ethtool/netlink.c
@@ -0,0 +1,729 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <net/sock.h>
+#include <linux/ethtool_netlink.h>
+#include "netlink.h"
+
+static struct genl_family ethtool_genl_family;
+
+static bool ethnl_ok __read_mostly;
+static u32 ethnl_bcast_seq;
+
+static const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_MAX + 1] = {
+ [ETHTOOL_A_HEADER_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = { .type = NLA_U32 },
+};
+
+/**
+ * ethnl_parse_header() - parse request header
+ * @req_info: structure to put results into
+ * @header: nest attribute with request header
+ * @net: request netns
+ * @extack: netlink extack for error reporting
+ * @require_dev: fail if no device identified in header
+ *
+ * Parse request header in nested attribute @nest and puts results into
+ * the structure pointed to by @req_info. Extack from @info is used for error
+ * reporting. If req_info->dev is not null on return, reference to it has
+ * been taken. If error is returned, *req_info is null initialized and no
+ * reference is held.
+ *
+ * Return: 0 on success or negative error code
+ */
+int ethnl_parse_header(struct ethnl_req_info *req_info,
+ const struct nlattr *header, struct net *net,
+ struct netlink_ext_ack *extack, bool require_dev)
+{
+ struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1];
+ const struct nlattr *devname_attr;
+ struct net_device *dev = NULL;
+ int ret;
+
+ if (!header) {
+ NL_SET_ERR_MSG(extack, "request header missing");
+ return -EINVAL;
+ }
+ ret = nla_parse_nested(tb, ETHTOOL_A_HEADER_MAX, header,
+ ethnl_header_policy, extack);
+ if (ret < 0)
+ return ret;
+ devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
+
+ if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
+ u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_HEADER_DEV_INDEX],
+ "no device matches ifindex");
+ return -ENODEV;
+ }
+ /* if both ifindex and ifname are passed, they must match */
+ if (devname_attr &&
+ strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) {
+ dev_put(dev);
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "ifindex and name do not match");
+ return -ENODEV;
+ }
+ } else if (devname_attr) {
+ dev = dev_get_by_name(net, nla_data(devname_attr));
+ if (!dev) {
+ NL_SET_ERR_MSG_ATTR(extack, devname_attr,
+ "no device matches name");
+ return -ENODEV;
+ }
+ } else if (require_dev) {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "neither ifindex nor name specified");
+ return -EINVAL;
+ }
+
+ if (dev && !netif_device_present(dev)) {
+ dev_put(dev);
+ NL_SET_ERR_MSG(extack, "device not present");
+ return -ENODEV;
+ }
+
+ req_info->dev = dev;
+ if (tb[ETHTOOL_A_HEADER_FLAGS])
+ req_info->flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
+
+ return 0;
+}
+
+/**
+ * ethnl_fill_reply_header() - Put common header into a reply message
+ * @skb: skb with the message
+ * @dev: network device to describe in header
+ * @attrtype: attribute type to use for the nest
+ *
+ * Create a nested attribute with attributes describing given network device.
+ *
+ * Return: 0 on success, error value (-EMSGSIZE only) on error
+ */
+int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
+ u16 attrtype)
+{
+ struct nlattr *nest;
+
+ if (!dev)
+ return 0;
+ nest = nla_nest_start(skb, attrtype);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, ETHTOOL_A_HEADER_DEV_INDEX, (u32)dev->ifindex) ||
+ nla_put_string(skb, ETHTOOL_A_HEADER_DEV_NAME, dev->name))
+ goto nla_put_failure;
+ /* If more attributes are put into reply header, ethnl_header_size()
+ * must be updated to account for them.
+ */
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+/**
+ * ethnl_reply_init() - Create skb for a reply and fill device identification
+ * @payload: payload length (without netlink and genetlink header)
+ * @dev: device the reply is about (may be null)
+ * @cmd: ETHTOOL_MSG_* message type for reply
+ * @hdr_attrtype: attribute type for common header
+ * @info: genetlink info of the received packet we respond to
+ * @ehdrp: place to store payload pointer returned by genlmsg_new()
+ *
+ * Return: pointer to allocated skb on success, NULL on error
+ */
+struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
+ u16 hdr_attrtype, struct genl_info *info,
+ void **ehdrp)
+{
+ struct sk_buff *skb;
+
+ skb = genlmsg_new(payload, GFP_KERNEL);
+ if (!skb)
+ goto err;
+ *ehdrp = genlmsg_put_reply(skb, info, &ethtool_genl_family, 0, cmd);
+ if (!*ehdrp)
+ goto err_free;
+
+ if (dev) {
+ int ret;
+
+ ret = ethnl_fill_reply_header(skb, dev, hdr_attrtype);
+ if (ret < 0)
+ goto err_free;
+ }
+ return skb;
+
+err_free:
+ nlmsg_free(skb);
+err:
+ if (info)
+ GENL_SET_ERR_MSG(info, "failed to setup reply message");
+ return NULL;
+}
+
+static void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
+{
+ return genlmsg_put(skb, 0, ++ethnl_bcast_seq, &ethtool_genl_family, 0,
+ cmd);
+}
+
+static int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
+{
+ return genlmsg_multicast_netns(&ethtool_genl_family, dev_net(dev), skb,
+ 0, ETHNL_MCGRP_MONITOR, GFP_KERNEL);
+}
+
+/* GET request helpers */
+
+/**
+ * struct ethnl_dump_ctx - context structure for generic dumpit() callback
+ * @ops: request ops of currently processed message type
+ * @req_info: parsed request header of processed request
+ * @reply_data: data needed to compose the reply
+ * @pos_hash: saved iteration position - hashbucket
+ * @pos_idx: saved iteration position - index
+ *
+ * These parameters are kept in struct netlink_callback as context preserved
+ * between iterations. They are initialized by ethnl_default_start() and used
+ * in ethnl_default_dumpit() and ethnl_default_done().
+ */
+struct ethnl_dump_ctx {
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info *req_info;
+ struct ethnl_reply_data *reply_data;
+ int pos_hash;
+ int pos_idx;
+};
+
+static const struct ethnl_request_ops *
+ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
+ [ETHTOOL_MSG_STRSET_GET] = &ethnl_strset_request_ops,
+ [ETHTOOL_MSG_LINKINFO_GET] = &ethnl_linkinfo_request_ops,
+ [ETHTOOL_MSG_LINKMODES_GET] = &ethnl_linkmodes_request_ops,
+ [ETHTOOL_MSG_LINKSTATE_GET] = &ethnl_linkstate_request_ops,
+ [ETHTOOL_MSG_DEBUG_GET] = &ethnl_debug_request_ops,
+ [ETHTOOL_MSG_WOL_GET] = &ethnl_wol_request_ops,
+};
+
+static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
+{
+ return (struct ethnl_dump_ctx *)cb->ctx;
+}
+
+/**
+ * ethnl_default_parse() - Parse request message
+ * @req_info: pointer to structure to put data into
+ * @nlhdr: pointer to request message header
+ * @net: request netns
+ * @request_ops: struct request_ops for request type
+ * @extack: netlink extack for error reporting
+ * @require_dev: fail if no device identified in header
+ *
+ * Parse universal request header and call request specific ->parse_request()
+ * callback (if defined) to parse the rest of the message.
+ *
+ * Return: 0 on success or negative error code
+ */
+static int ethnl_default_parse(struct ethnl_req_info *req_info,
+ const struct nlmsghdr *nlhdr, struct net *net,
+ const struct ethnl_request_ops *request_ops,
+ struct netlink_ext_ack *extack, bool require_dev)
+{
+ struct nlattr **tb;
+ int ret;
+
+ tb = kmalloc_array(request_ops->max_attr + 1, sizeof(tb[0]),
+ GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+
+ ret = nlmsg_parse(nlhdr, GENL_HDRLEN, tb, request_ops->max_attr,
+ request_ops->request_policy, extack);
+ if (ret < 0)
+ goto out;
+ ret = ethnl_parse_header(req_info, tb[request_ops->hdr_attr], net,
+ extack, require_dev);
+ if (ret < 0)
+ goto out;
+
+ if (request_ops->parse_request) {
+ ret = request_ops->parse_request(req_info, tb, extack);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kfree(tb);
+ return ret;
+}
+
+/**
+ * ethnl_init_reply_data() - Initialize reply data for GET request
+ * @reply_data: pointer to embedded struct ethnl_reply_data
+ * @ops: instance of struct ethnl_request_ops describing the layout
+ * @dev: network device to initialize the reply for
+ *
+ * Fills the reply data part with zeros and sets the dev member. Must be called
+ * before calling the ->fill_reply() callback (for each iteration when handling
+ * dump requests).
+ */
+static void ethnl_init_reply_data(struct ethnl_reply_data *reply_data,
+ const struct ethnl_request_ops *ops,
+ struct net_device *dev)
+{
+ memset(reply_data, 0, ops->reply_data_size);
+ reply_data->dev = dev;
+}
+
+/* default ->doit() handler for GET type requests */
+static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_reply_data *reply_data = NULL;
+ struct ethnl_req_info *req_info = NULL;
+ const u8 cmd = info->genlhdr->cmd;
+ const struct ethnl_request_ops *ops;
+ struct sk_buff *rskb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ ops = ethnl_default_requests[cmd];
+ if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
+ return -EOPNOTSUPP;
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return -ENOMEM;
+ reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
+ if (!reply_data) {
+ kfree(req_info);
+ return -ENOMEM;
+ }
+
+ ret = ethnl_default_parse(req_info, info->nlhdr, genl_info_net(info), ops,
+ info->extack, !ops->allow_nodev_do);
+ if (ret < 0)
+ goto err_dev;
+ ethnl_init_reply_data(reply_data, ops, req_info->dev);
+
+ rtnl_lock();
+ ret = ops->prepare_data(req_info, reply_data, info);
+ rtnl_unlock();
+ if (ret < 0)
+ goto err_cleanup;
+ ret = ops->reply_size(req_info, reply_data);
+ if (ret < 0)
+ goto err_cleanup;
+ reply_len = ret;
+ ret = -ENOMEM;
+ rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd,
+ ops->hdr_attr, info, &reply_payload);
+ if (!rskb)
+ goto err_cleanup;
+ ret = ops->fill_reply(rskb, req_info, reply_data);
+ if (ret < 0)
+ goto err_msg;
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+
+ genlmsg_end(rskb, reply_payload);
+ if (req_info->dev)
+ dev_put(req_info->dev);
+ kfree(reply_data);
+ kfree(req_info);
+ return genlmsg_reply(rskb, info);
+
+err_msg:
+ WARN_ONCE(ret == -EMSGSIZE, "calculated message payload length (%d) not sufficient\n", reply_len);
+ nlmsg_free(rskb);
+err_cleanup:
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+err_dev:
+ if (req_info->dev)
+ dev_put(req_info->dev);
+ kfree(reply_data);
+ kfree(req_info);
+ return ret;
+}
+
+static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
+ const struct ethnl_dump_ctx *ctx)
+{
+ int ret;
+
+ ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
+ rtnl_lock();
+ ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
+ rtnl_unlock();
+ if (ret < 0)
+ goto out;
+ ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
+ if (ret < 0)
+ goto out;
+ ret = ctx->ops->fill_reply(skb, ctx->req_info, ctx->reply_data);
+
+out:
+ if (ctx->ops->cleanup_data)
+ ctx->ops->cleanup_data(ctx->reply_data);
+ ctx->reply_data->dev = NULL;
+ return ret;
+}
+
+/* Default ->dumpit() handler for GET requests. Device iteration copied from
+ * rtnl_dump_ifinfo(); we have to be more careful about device hashtable
+ * persistence as we cannot guarantee to hold RTNL lock through the whole
+ * function as rtnetnlink does.
+ */
+static int ethnl_default_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
+ struct net *net = sock_net(skb->sk);
+ int s_idx = ctx->pos_idx;
+ int h, idx = 0;
+ int ret = 0;
+ void *ehdr;
+
+ rtnl_lock();
+ for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ struct hlist_head *head;
+ struct net_device *dev;
+ unsigned int seq;
+
+ head = &net->dev_index_head[h];
+
+restart_chain:
+ seq = net->dev_base_seq;
+ cb->seq = seq;
+ idx = 0;
+ hlist_for_each_entry(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ dev_hold(dev);
+ rtnl_unlock();
+
+ ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ &ethtool_genl_family, 0,
+ ctx->ops->reply_cmd);
+ if (!ehdr) {
+ dev_put(dev);
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ ret = ethnl_default_dump_one(skb, dev, ctx);
+ dev_put(dev);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ goto lock_and_cont;
+ if (likely(skb->len))
+ ret = skb->len;
+ goto out;
+ }
+ genlmsg_end(skb, ehdr);
+lock_and_cont:
+ rtnl_lock();
+ if (net->dev_base_seq != seq) {
+ s_idx = idx + 1;
+ goto restart_chain;
+ }
+cont:
+ idx++;
+ }
+
+ }
+ rtnl_unlock();
+
+out:
+ ctx->pos_hash = h;
+ ctx->pos_idx = idx;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+ return ret;
+}
+
+/* generic ->start() handler for GET requests */
+static int ethnl_default_start(struct netlink_callback *cb)
+{
+ struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
+ struct ethnl_reply_data *reply_data;
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info *req_info;
+ struct genlmsghdr *ghdr;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ ghdr = nlmsg_data(cb->nlh);
+ ops = ethnl_default_requests[ghdr->cmd];
+ if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", ghdr->cmd))
+ return -EOPNOTSUPP;
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return -ENOMEM;
+ reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
+ if (!reply_data) {
+ ret = -ENOMEM;
+ goto free_req_info;
+ }
+
+ ret = ethnl_default_parse(req_info, cb->nlh, sock_net(cb->skb->sk), ops,
+ cb->extack, false);
+ if (req_info->dev) {
+ /* We ignore device specification in dump requests but as the
+ * same parser as for non-dump (doit) requests is used, it
+ * would take reference to the device if it finds one
+ */
+ dev_put(req_info->dev);
+ req_info->dev = NULL;
+ }
+ if (ret < 0)
+ goto free_reply_data;
+
+ ctx->ops = ops;
+ ctx->req_info = req_info;
+ ctx->reply_data = reply_data;
+ ctx->pos_hash = 0;
+ ctx->pos_idx = 0;
+
+ return 0;
+
+free_reply_data:
+ kfree(reply_data);
+free_req_info:
+ kfree(req_info);
+
+ return ret;
+}
+
+/* default ->done() handler for GET requests */
+static int ethnl_default_done(struct netlink_callback *cb)
+{
+ struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
+
+ kfree(ctx->reply_data);
+ kfree(ctx->req_info);
+
+ return 0;
+}
+
+static const struct ethnl_request_ops *
+ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
+ [ETHTOOL_MSG_LINKINFO_NTF] = &ethnl_linkinfo_request_ops,
+ [ETHTOOL_MSG_LINKMODES_NTF] = &ethnl_linkmodes_request_ops,
+ [ETHTOOL_MSG_DEBUG_NTF] = &ethnl_debug_request_ops,
+ [ETHTOOL_MSG_WOL_NTF] = &ethnl_wol_request_ops,
+};
+
+/* default notification handler */
+static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
+ const void *data)
+{
+ struct ethnl_reply_data *reply_data;
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info *req_info;
+ struct sk_buff *skb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
+ !ethnl_default_notify_ops[cmd],
+ "unexpected notification type %u\n", cmd))
+ return;
+ ops = ethnl_default_notify_ops[cmd];
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return;
+ reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
+ if (!reply_data) {
+ kfree(req_info);
+ return;
+ }
+
+ req_info->dev = dev;
+ req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
+
+ ethnl_init_reply_data(reply_data, ops, dev);
+ ret = ops->prepare_data(req_info, reply_data, NULL);
+ if (ret < 0)
+ goto err_cleanup;
+ ret = ops->reply_size(req_info, reply_data);
+ if (ret < 0)
+ goto err_cleanup;
+ reply_len = ret;
+ ret = -ENOMEM;
+ skb = genlmsg_new(reply_len, GFP_KERNEL);
+ if (!skb)
+ goto err_cleanup;
+ reply_payload = ethnl_bcastmsg_put(skb, cmd);
+ if (!reply_payload)
+ goto err_skb;
+ ret = ethnl_fill_reply_header(skb, dev, ops->hdr_attr);
+ if (ret < 0)
+ goto err_msg;
+ ret = ops->fill_reply(skb, req_info, reply_data);
+ if (ret < 0)
+ goto err_msg;
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+
+ genlmsg_end(skb, reply_payload);
+ kfree(reply_data);
+ kfree(req_info);
+ ethnl_multicast(skb, dev);
+ return;
+
+err_msg:
+ WARN_ONCE(ret == -EMSGSIZE,
+ "calculated message payload length (%d) not sufficient\n",
+ reply_len);
+err_skb:
+ nlmsg_free(skb);
+err_cleanup:
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+ kfree(reply_data);
+ kfree(req_info);
+ return;
+}
+
+/* notifications */
+
+typedef void (*ethnl_notify_handler_t)(struct net_device *dev, unsigned int cmd,
+ const void *data);
+
+static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
+ [ETHTOOL_MSG_LINKINFO_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_LINKMODES_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify,
+};
+
+void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
+{
+ if (unlikely(!ethnl_ok))
+ return;
+ ASSERT_RTNL();
+
+ if (likely(cmd < ARRAY_SIZE(ethnl_notify_handlers) &&
+ ethnl_notify_handlers[cmd]))
+ ethnl_notify_handlers[cmd](dev, cmd, data);
+ else
+ WARN_ONCE(1, "notification %u not implemented (dev=%s)\n",
+ cmd, netdev_name(dev));
+}
+EXPORT_SYMBOL(ethtool_notify);
+
+/* genetlink setup */
+
+static const struct genl_ops ethtool_genl_ops[] = {
+ {
+ .cmd = ETHTOOL_MSG_STRSET_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKINFO_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKINFO_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_linkinfo,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKMODES_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKMODES_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_linkmodes,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKSTATE_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_DEBUG_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_DEBUG_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_debug,
+ },
+ {
+ .cmd = ETHTOOL_MSG_WOL_GET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_WOL_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_wol,
+ },
+};
+
+static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
+ [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME },
+};
+
+static struct genl_family ethtool_genl_family = {
+ .name = ETHTOOL_GENL_NAME,
+ .version = ETHTOOL_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ethtool_genl_ops,
+ .n_ops = ARRAY_SIZE(ethtool_genl_ops),
+ .mcgrps = ethtool_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps),
+};
+
+/* module setup */
+
+static int __init ethnl_init(void)
+{
+ int ret;
+
+ ret = genl_register_family(&ethtool_genl_family);
+ if (WARN(ret < 0, "ethtool: genetlink family registration failed"))
+ return ret;
+ ethnl_ok = true;
+
+ return 0;
+}
+
+subsys_initcall(ethnl_init);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
new file mode 100644
index 000000000000..60efd87686ad
--- /dev/null
+++ b/net/ethtool/netlink.h
@@ -0,0 +1,345 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_ETHTOOL_NETLINK_H
+#define _NET_ETHTOOL_NETLINK_H
+
+#include <linux/ethtool_netlink.h>
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+
+struct ethnl_req_info;
+
+int ethnl_parse_header(struct ethnl_req_info *req_info,
+ const struct nlattr *nest, struct net *net,
+ struct netlink_ext_ack *extack, bool require_dev);
+int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
+ u16 attrtype);
+struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
+ u16 hdr_attrtype, struct genl_info *info,
+ void **ehdrp);
+
+/**
+ * ethnl_strz_size() - calculate attribute length for fixed size string
+ * @s: ETH_GSTRING_LEN sized string (may not be null terminated)
+ *
+ * Return: total length of an attribute with null terminated string from @s
+ */
+static inline int ethnl_strz_size(const char *s)
+{
+ return nla_total_size(strnlen(s, ETH_GSTRING_LEN) + 1);
+}
+
+/**
+ * ethnl_put_strz() - put string attribute with fixed size string
+ * @skb: skb with the message
+ * @attrype: attribute type
+ * @s: ETH_GSTRING_LEN sized string (may not be null terminated)
+ *
+ * Puts an attribute with null terminated string from @s into the message.
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static inline int ethnl_put_strz(struct sk_buff *skb, u16 attrtype,
+ const char *s)
+{
+ unsigned int len = strnlen(s, ETH_GSTRING_LEN);
+ struct nlattr *attr;
+
+ attr = nla_reserve(skb, attrtype, len + 1);
+ if (!attr)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(attr), s, len);
+ ((char *)nla_data(attr))[len] = '\0';
+ return 0;
+}
+
+/**
+ * ethnl_update_u32() - update u32 value from NLA_U32 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Copy the u32 value from NLA_U32 netlink attribute @attr into variable
+ * pointed to by @dst; do nothing if @attr is null. Bool pointed to by @mod
+ * is set to true if this function changed the value of *dst, otherwise it
+ * is left as is.
+ */
+static inline void ethnl_update_u32(u32 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u32 val;
+
+ if (!attr)
+ return;
+ val = nla_get_u32(attr);
+ if (*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
+ * ethnl_update_u8() - update u8 value from NLA_U8 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Copy the u8 value from NLA_U8 netlink attribute @attr into variable
+ * pointed to by @dst; do nothing if @attr is null. Bool pointed to by @mod
+ * is set to true if this function changed the value of *dst, otherwise it
+ * is left as is.
+ */
+static inline void ethnl_update_u8(u8 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u8 val;
+
+ if (!attr)
+ return;
+ val = nla_get_u8(attr);
+ if (*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
+ * ethnl_update_bool32() - update u32 used as bool from NLA_U8 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Use the u8 value from NLA_U8 netlink attribute @attr to set u32 variable
+ * pointed to by @dst to 0 (if zero) or 1 (if not); do nothing if @attr is
+ * null. Bool pointed to by @mod is set to true if this function changed the
+ * logical value of *dst, otherwise it is left as is.
+ */
+static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u8 val;
+
+ if (!attr)
+ return;
+ val = !!nla_get_u8(attr);
+ if (!!*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
+ * ethnl_update_binary() - update binary data from NLA_BINARY atribute
+ * @dst: value to update
+ * @len: destination buffer length
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Use the u8 value from NLA_U8 netlink attribute @attr to rewrite data block
+ * of length @len at @dst by attribute payload; do nothing if @attr is null.
+ * Bool pointed to by @mod is set to true if this function changed the logical
+ * value of *dst, otherwise it is left as is.
+ */
+static inline void ethnl_update_binary(void *dst, unsigned int len,
+ const struct nlattr *attr, bool *mod)
+{
+ if (!attr)
+ return;
+ if (nla_len(attr) < len)
+ len = nla_len(attr);
+ if (!memcmp(dst, nla_data(attr), len))
+ return;
+
+ memcpy(dst, nla_data(attr), len);
+ *mod = true;
+}
+
+/**
+ * ethnl_update_bitfield32() - update u32 value from NLA_BITFIELD32 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Update bits in u32 value which are set in attribute's mask to values from
+ * attribute's value. Do nothing if @attr is null or the value wouldn't change;
+ * otherwise, set bool pointed to by @mod to true.
+ */
+static inline void ethnl_update_bitfield32(u32 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ struct nla_bitfield32 change;
+ u32 newval;
+
+ if (!attr)
+ return;
+ change = nla_get_bitfield32(attr);
+ newval = (*dst & ~change.selector) | (change.value & change.selector);
+ if (*dst == newval)
+ return;
+
+ *dst = newval;
+ *mod = true;
+}
+
+/**
+ * ethnl_reply_header_size() - total size of reply header
+ *
+ * This is an upper estimate so that we do not need to hold RTNL lock longer
+ * than necessary (to prevent rename between size estimate and composing the
+ * message). Accounts only for device ifindex and name as those are the only
+ * attributes ethnl_fill_reply_header() puts into the reply header.
+ */
+static inline unsigned int ethnl_reply_header_size(void)
+{
+ return nla_total_size(nla_total_size(sizeof(u32)) +
+ nla_total_size(IFNAMSIZ));
+}
+
+/* GET request handling */
+
+/* Unified processing of GET requests uses two data structures: request info
+ * and reply data. Request info holds information parsed from client request
+ * and its stays constant through all request processing. Reply data holds data
+ * retrieved from ethtool_ops callbacks or other internal sources which is used
+ * to compose the reply. When processing a dump request, request info is filled
+ * only once (when the request message is parsed) but reply data is filled for
+ * each reply message.
+ *
+ * Both structures consist of part common for all request types (struct
+ * ethnl_req_info and struct ethnl_reply_data defined below) and optional
+ * parts specific for each request type. Common part always starts at offset 0.
+ */
+
+/**
+ * struct ethnl_req_info - base type of request information for GET requests
+ * @dev: network device the request is for (may be null)
+ * @flags: request flags common for all request types
+ *
+ * This is a common base for request specific structures holding data from
+ * parsed userspace request. These always embed struct ethnl_req_info at
+ * zero offset.
+ */
+struct ethnl_req_info {
+ struct net_device *dev;
+ u32 flags;
+};
+
+/**
+ * struct ethnl_reply_data - base type of reply data for GET requests
+ * @dev: device for current reply message; in single shot requests it is
+ * equal to &ethnl_req_info.dev; in dumps it's different for each
+ * reply message
+ *
+ * This is a common base for request specific structures holding data for
+ * kernel reply message. These always embed struct ethnl_reply_data at zero
+ * offset.
+ */
+struct ethnl_reply_data {
+ struct net_device *dev;
+};
+
+static inline int ethnl_ops_begin(struct net_device *dev)
+{
+ if (dev && dev->ethtool_ops->begin)
+ return dev->ethtool_ops->begin(dev);
+ else
+ return 0;
+}
+
+static inline void ethnl_ops_complete(struct net_device *dev)
+{
+ if (dev && dev->ethtool_ops->complete)
+ dev->ethtool_ops->complete(dev);
+}
+
+/**
+ * struct ethnl_request_ops - unified handling of GET requests
+ * @request_cmd: command id for request (GET)
+ * @reply_cmd: command id for reply (GET_REPLY)
+ * @hdr_attr: attribute type for request header
+ * @max_attr: maximum (top level) attribute type
+ * @req_info_size: size of request info
+ * @reply_data_size: size of reply data
+ * @request_policy: netlink policy for message contents
+ * @allow_nodev_do: allow non-dump request with no device identification
+ * @parse_request:
+ * Parse request except common header (struct ethnl_req_info). Common
+ * header is already filled on entry, the rest up to @repdata_offset
+ * is zero initialized. This callback should only modify type specific
+ * request info by parsed attributes from request message.
+ * @prepare_data:
+ * Retrieve and prepare data needed to compose a reply message. Calls to
+ * ethtool_ops handlers are limited to this callback. Common reply data
+ * (struct ethnl_reply_data) is filled on entry, type specific part after
+ * it is zero initialized. This callback should only modify the type
+ * specific part of reply data. Device identification from struct
+ * ethnl_reply_data is to be used as for dump requests, it iterates
+ * through network devices while dev member of struct ethnl_req_info
+ * points to the device from client request.
+ * @reply_size:
+ * Estimate reply message size. Returned value must be sufficient for
+ * message payload without common reply header. The callback may returned
+ * estimate higher than actual message size if exact calculation would
+ * not be worth the saved memory space.
+ * @fill_reply:
+ * Fill reply message payload (except for common header) from reply data.
+ * The callback must not generate more payload than previously called
+ * ->reply_size() estimated.
+ * @cleanup_data:
+ * Optional cleanup called when reply data is no longer needed. Can be
+ * used e.g. to free any additional data structures outside the main
+ * structure which were allocated by ->prepare_data(). When processing
+ * dump requests, ->cleanup() is called for each message.
+ *
+ * Description of variable parts of GET request handling when using the
+ * unified infrastructure. When used, a pointer to an instance of this
+ * structure is to be added to &ethnl_default_requests array and generic
+ * handlers ethnl_default_doit(), ethnl_default_dumpit(),
+ * ethnl_default_start() and ethnl_default_done() used in @ethtool_genl_ops;
+ * ethnl_default_notify() can be used in @ethnl_notify_handlers to send
+ * notifications of the corresponding type.
+ */
+struct ethnl_request_ops {
+ u8 request_cmd;
+ u8 reply_cmd;
+ u16 hdr_attr;
+ unsigned int max_attr;
+ unsigned int req_info_size;
+ unsigned int reply_data_size;
+ const struct nla_policy *request_policy;
+ bool allow_nodev_do;
+
+ int (*parse_request)(struct ethnl_req_info *req_info,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack);
+ int (*prepare_data)(const struct ethnl_req_info *req_info,
+ struct ethnl_reply_data *reply_data,
+ struct genl_info *info);
+ int (*reply_size)(const struct ethnl_req_info *req_info,
+ const struct ethnl_reply_data *reply_data);
+ int (*fill_reply)(struct sk_buff *skb,
+ const struct ethnl_req_info *req_info,
+ const struct ethnl_reply_data *reply_data);
+ void (*cleanup_data)(struct ethnl_reply_data *reply_data);
+};
+
+/* request handlers */
+
+extern const struct ethnl_request_ops ethnl_strset_request_ops;
+extern const struct ethnl_request_ops ethnl_linkinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_linkmodes_request_ops;
+extern const struct ethnl_request_ops ethnl_linkstate_request_ops;
+extern const struct ethnl_request_ops ethnl_debug_request_ops;
+extern const struct ethnl_request_ops ethnl_wol_request_ops;
+
+int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info);
+
+#endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
new file mode 100644
index 000000000000..8e5911887b4c
--- /dev/null
+++ b/net/ethtool/strset.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+#include "netlink.h"
+#include "common.h"
+
+struct strset_info {
+ bool per_dev;
+ bool free_strings;
+ unsigned int count;
+ const char (*strings)[ETH_GSTRING_LEN];
+};
+
+static const struct strset_info info_template[] = {
+ [ETH_SS_TEST] = {
+ .per_dev = true,
+ },
+ [ETH_SS_STATS] = {
+ .per_dev = true,
+ },
+ [ETH_SS_PRIV_FLAGS] = {
+ .per_dev = true,
+ },
+ [ETH_SS_FEATURES] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(netdev_features_strings),
+ .strings = netdev_features_strings,
+ },
+ [ETH_SS_RSS_HASH_FUNCS] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(rss_hash_func_strings),
+ .strings = rss_hash_func_strings,
+ },
+ [ETH_SS_TUNABLES] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(tunable_strings),
+ .strings = tunable_strings,
+ },
+ [ETH_SS_PHY_STATS] = {
+ .per_dev = true,
+ },
+ [ETH_SS_PHY_TUNABLES] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(phy_tunable_strings),
+ .strings = phy_tunable_strings,
+ },
+ [ETH_SS_LINK_MODES] = {
+ .per_dev = false,
+ .count = __ETHTOOL_LINK_MODE_MASK_NBITS,
+ .strings = link_mode_names,
+ },
+ [ETH_SS_MSG_CLASSES] = {
+ .per_dev = false,
+ .count = NETIF_MSG_CLASS_COUNT,
+ .strings = netif_msg_class_names,
+ },
+ [ETH_SS_WOL_MODES] = {
+ .per_dev = false,
+ .count = WOL_MODE_COUNT,
+ .strings = wol_mode_names,
+ },
+};
+
+struct strset_req_info {
+ struct ethnl_req_info base;
+ u32 req_ids;
+ bool counts_only;
+};
+
+#define STRSET_REQINFO(__req_base) \
+ container_of(__req_base, struct strset_req_info, base)
+
+struct strset_reply_data {
+ struct ethnl_reply_data base;
+ struct strset_info sets[ETH_SS_COUNT];
+};
+
+#define STRSET_REPDATA(__reply_base) \
+ container_of(__reply_base, struct strset_reply_data, base)
+
+static const struct nla_policy strset_get_policy[ETHTOOL_A_STRSET_MAX + 1] = {
+ [ETHTOOL_A_STRSET_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRSET_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+get_stringset_policy[ETHTOOL_A_STRINGSET_MAX + 1] = {
+ [ETHTOOL_A_STRINGSET_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRINGSET_ID] = { .type = NLA_U32 },
+ [ETHTOOL_A_STRINGSET_COUNT] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRINGSET_STRINGS] = { .type = NLA_REJECT },
+};
+
+/**
+ * strset_include() - test if a string set should be included in reply
+ * @info: parsed client request
+ * @data: pointer to request data structure
+ * @id: id of string set to check (ETH_SS_* constants)
+ */
+static bool strset_include(const struct strset_req_info *info,
+ const struct strset_reply_data *data, u32 id)
+{
+ bool per_dev;
+
+ BUILD_BUG_ON(ETH_SS_COUNT >= BITS_PER_BYTE * sizeof(info->req_ids));
+
+ if (info->req_ids)
+ return info->req_ids & (1U << id);
+ per_dev = data->sets[id].per_dev;
+ if (!per_dev && !data->sets[id].strings)
+ return false;
+
+ return data->base.dev ? per_dev : !per_dev;
+}
+
+static int strset_get_id(const struct nlattr *nest, u32 *val,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[ETHTOOL_A_STRINGSET_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_STRINGSET_MAX, nest,
+ get_stringset_policy, extack);
+ if (ret < 0)
+ return ret;
+ if (!tb[ETHTOOL_A_STRINGSET_ID])
+ return -EINVAL;
+
+ *val = nla_get_u32(tb[ETHTOOL_A_STRINGSET_ID]);
+ return 0;
+}
+
+static const struct nla_policy
+strset_stringsets_policy[ETHTOOL_A_STRINGSETS_MAX + 1] = {
+ [ETHTOOL_A_STRINGSETS_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRINGSETS_STRINGSET] = { .type = NLA_NESTED },
+};
+
+static int strset_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ struct nlattr *nest = tb[ETHTOOL_A_STRSET_STRINGSETS];
+ struct nlattr *attr;
+ int rem, ret;
+
+ if (!nest)
+ return 0;
+ ret = nla_validate_nested(nest, ETHTOOL_A_STRINGSETS_MAX,
+ strset_stringsets_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ req_info->counts_only = tb[ETHTOOL_A_STRSET_COUNTS_ONLY];
+ nla_for_each_nested(attr, nest, rem) {
+ u32 id;
+
+ if (WARN_ONCE(nla_type(attr) != ETHTOOL_A_STRINGSETS_STRINGSET,
+ "unexpected attrtype %u in ETHTOOL_A_STRSET_STRINGSETS\n",
+ nla_type(attr)))
+ return -EINVAL;
+
+ ret = strset_get_id(attr, &id, extack);
+ if (ret < 0)
+ return ret;
+ if (ret >= ETH_SS_COUNT) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "unknown string set id");
+ return -EOPNOTSUPP;
+ }
+
+ req_info->req_ids |= (1U << id);
+ }
+
+ return 0;
+}
+
+static void strset_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ unsigned int i;
+
+ for (i = 0; i < ETH_SS_COUNT; i++)
+ if (data->sets[i].free_strings) {
+ kfree(data->sets[i].strings);
+ data->sets[i].strings = NULL;
+ data->sets[i].free_strings = false;
+ }
+}
+
+static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
+ unsigned int id, bool counts_only)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *strings;
+ int count, ret;
+
+ if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ ret = phy_ethtool_get_sset_count(dev->phydev);
+ else if (ops->get_sset_count && ops->get_strings)
+ ret = ops->get_sset_count(dev, id);
+ else
+ ret = -EOPNOTSUPP;
+ if (ret <= 0) {
+ info->count = 0;
+ return 0;
+ }
+
+ count = ret;
+ if (!counts_only) {
+ strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL);
+ if (!strings)
+ return -ENOMEM;
+ if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ phy_ethtool_get_strings(dev->phydev, strings);
+ else
+ ops->get_strings(dev, id, strings);
+ info->strings = strings;
+ info->free_strings = true;
+ }
+ info->count = count;
+
+ return 0;
+}
+
+static int strset_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ unsigned int i;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(info_template) != ETH_SS_COUNT);
+ memcpy(&data->sets, &info_template, sizeof(data->sets));
+
+ if (!dev) {
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ if ((req_info->req_ids & (1U << i)) &&
+ data->sets[i].per_dev) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "requested per device strings without dev");
+ return -EINVAL;
+ }
+ }
+ return 0;
+ }
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto err_strset;
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ if (!strset_include(req_info, data, i) ||
+ !data->sets[i].per_dev)
+ continue;
+
+ ret = strset_prepare_set(&data->sets[i], dev, i,
+ req_info->counts_only);
+ if (ret < 0)
+ goto err_ops;
+ }
+ ethnl_ops_complete(dev);
+
+ return 0;
+err_ops:
+ ethnl_ops_complete(dev);
+err_strset:
+ strset_cleanup_data(reply_base);
+ return ret;
+}
+
+/* calculate size of ETHTOOL_A_STRSET_STRINGSET nest for one string set */
+static int strset_set_size(const struct strset_info *info, bool counts_only)
+{
+ unsigned int len = 0;
+ unsigned int i;
+
+ if (info->count == 0)
+ return 0;
+ if (counts_only)
+ return nla_total_size(2 * nla_total_size(sizeof(u32)));
+
+ for (i = 0; i < info->count; i++) {
+ const char *str = info->strings[i];
+
+ /* ETHTOOL_A_STRING_INDEX, ETHTOOL_A_STRING_VALUE, nest */
+ len += nla_total_size(nla_total_size(sizeof(u32)) +
+ ethnl_strz_size(str));
+ }
+ /* ETHTOOL_A_STRINGSET_ID, ETHTOOL_A_STRINGSET_COUNT */
+ len = 2 * nla_total_size(sizeof(u32)) + nla_total_size(len);
+
+ return nla_total_size(len);
+}
+
+static int strset_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ const struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ unsigned int i;
+ int len = 0;
+ int ret;
+
+ len += ethnl_reply_header_size();
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ const struct strset_info *set_info = &data->sets[i];
+
+ if (!strset_include(req_info, data, i))
+ continue;
+
+ ret = strset_set_size(set_info, req_info->counts_only);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ }
+
+ return len;
+}
+
+/* fill one string into reply */
+static int strset_fill_string(struct sk_buff *skb,
+ const struct strset_info *set_info, u32 idx)
+{
+ struct nlattr *string_attr;
+ const char *value;
+
+ value = set_info->strings[idx];
+
+ string_attr = nla_nest_start(skb, ETHTOOL_A_STRINGS_STRING);
+ if (!string_attr)
+ return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_STRING_INDEX, idx) ||
+ ethnl_put_strz(skb, ETHTOOL_A_STRING_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(skb, string_attr);
+
+ return 0;
+nla_put_failure:
+ nla_nest_cancel(skb, string_attr);
+ return -EMSGSIZE;
+}
+
+/* fill one string set into reply */
+static int strset_fill_set(struct sk_buff *skb,
+ const struct strset_info *set_info, u32 id,
+ bool counts_only)
+{
+ struct nlattr *stringset_attr;
+ struct nlattr *strings_attr;
+ unsigned int i;
+
+ if (!set_info->per_dev && !set_info->strings)
+ return -EOPNOTSUPP;
+ if (set_info->count == 0)
+ return 0;
+ stringset_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSETS_STRINGSET);
+ if (!stringset_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, ETHTOOL_A_STRINGSET_ID, id) ||
+ nla_put_u32(skb, ETHTOOL_A_STRINGSET_COUNT, set_info->count))
+ goto nla_put_failure;
+
+ if (!counts_only) {
+ strings_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSET_STRINGS);
+ if (!strings_attr)
+ goto nla_put_failure;
+ for (i = 0; i < set_info->count; i++) {
+ if (strset_fill_string(skb, set_info, i) < 0)
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, strings_attr);
+ }
+
+ nla_nest_end(skb, stringset_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, stringset_attr);
+ return -EMSGSIZE;
+}
+
+static int strset_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ const struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ struct nlattr *nest;
+ unsigned int i;
+ int ret;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_STRSET_STRINGSETS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ if (strset_include(req_info, data, i)) {
+ ret = strset_fill_set(skb, &data->sets[i], i,
+ req_info->counts_only);
+ if (ret < 0)
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return ret;
+}
+
+const struct ethnl_request_ops ethnl_strset_request_ops = {
+ .request_cmd = ETHTOOL_MSG_STRSET_GET,
+ .reply_cmd = ETHTOOL_MSG_STRSET_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_STRSET_HEADER,
+ .max_attr = ETHTOOL_A_STRSET_MAX,
+ .req_info_size = sizeof(struct strset_req_info),
+ .reply_data_size = sizeof(struct strset_reply_data),
+ .request_policy = strset_get_policy,
+ .allow_nodev_do = true,
+
+ .parse_request = strset_parse_request,
+ .prepare_data = strset_prepare_data,
+ .reply_size = strset_reply_size,
+ .fill_reply = strset_fill_reply,
+ .cleanup_data = strset_cleanup_data,
+};
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
new file mode 100644
index 000000000000..e1b8a65b64c4
--- /dev/null
+++ b/net/ethtool/wol.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct wol_req_info {
+ struct ethnl_req_info base;
+};
+
+struct wol_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_wolinfo wol;
+ bool show_sopass;
+};
+
+#define WOL_REPDATA(__reply_base) \
+ container_of(__reply_base, struct wol_reply_data, base)
+
+static const struct nla_policy
+wol_get_policy[ETHTOOL_A_WOL_MAX + 1] = {
+ [ETHTOOL_A_WOL_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_WOL_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_WOL_MODES] = { .type = NLA_REJECT },
+ [ETHTOOL_A_WOL_SOPASS] = { .type = NLA_REJECT },
+};
+
+static int wol_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct wol_reply_data *data = WOL_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ if (!dev->ethtool_ops->get_wol)
+ return -EOPNOTSUPP;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ dev->ethtool_ops->get_wol(dev, &data->wol);
+ ethnl_ops_complete(dev);
+ /* do not include password in notifications */
+ data->show_sopass = info && (data->wol.supported & WAKE_MAGICSECURE);
+
+ return 0;
+}
+
+static int wol_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ const struct wol_reply_data *data = WOL_REPDATA(reply_base);
+ int len;
+
+ len = ethnl_bitset32_size(&data->wol.wolopts, &data->wol.supported,
+ WOL_MODE_COUNT, wol_mode_names, compact);
+ if (len < 0)
+ return len;
+ if (data->show_sopass)
+ len += nla_total_size(sizeof(data->wol.sopass));
+
+ return len;
+}
+
+static int wol_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ const struct wol_reply_data *data = WOL_REPDATA(reply_base);
+ int ret;
+
+ ret = ethnl_put_bitset32(skb, ETHTOOL_A_WOL_MODES, &data->wol.wolopts,
+ &data->wol.supported, WOL_MODE_COUNT,
+ wol_mode_names, compact);
+ if (ret < 0)
+ return ret;
+ if (data->show_sopass &&
+ nla_put(skb, ETHTOOL_A_WOL_SOPASS, sizeof(data->wol.sopass),
+ data->wol.sopass))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_wol_request_ops = {
+ .request_cmd = ETHTOOL_MSG_WOL_GET,
+ .reply_cmd = ETHTOOL_MSG_WOL_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_WOL_HEADER,
+ .max_attr = ETHTOOL_A_WOL_MAX,
+ .req_info_size = sizeof(struct wol_req_info),
+ .reply_data_size = sizeof(struct wol_reply_data),
+ .request_policy = wol_get_policy,
+
+ .prepare_data = wol_prepare_data,
+ .reply_size = wol_reply_size,
+ .fill_reply = wol_fill_reply,
+};
+
+/* WOL_SET */
+
+static const struct nla_policy
+wol_set_policy[ETHTOOL_A_WOL_MAX + 1] = {
+ [ETHTOOL_A_WOL_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_WOL_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_WOL_MODES] = { .type = NLA_NESTED },
+ [ETHTOOL_A_WOL_SOPASS] = { .type = NLA_BINARY,
+ .len = SOPASS_MAX },
+};
+
+int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+ struct nlattr *tb[ETHTOOL_A_WOL_MAX + 1];
+ struct ethnl_req_info req_info = {};
+ struct net_device *dev;
+ bool mod = false;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, ETHTOOL_A_WOL_MAX,
+ wol_set_policy, info->extack);
+ if (ret < 0)
+ return ret;
+ ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_WOL_HEADER],
+ genl_info_net(info), info->extack, true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+ if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
+ return -EOPNOTSUPP;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ dev->ethtool_ops->get_wol(dev, &wol);
+ ret = ethnl_update_bitset32(&wol.wolopts, WOL_MODE_COUNT,
+ tb[ETHTOOL_A_WOL_MODES], wol_mode_names,
+ info->extack, &mod);
+ if (ret < 0)
+ goto out_ops;
+ if (wol.wolopts & ~wol.supported) {
+ NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_WOL_MODES],
+ "cannot enable unsupported WoL mode");
+ ret = -EINVAL;
+ goto out_ops;
+ }
+ if (tb[ETHTOOL_A_WOL_SOPASS]) {
+ if (!(wol.supported & WAKE_MAGICSECURE)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_WOL_SOPASS],
+ "magicsecure not supported, cannot set password");
+ ret = -EINVAL;
+ goto out_ops;
+ }
+ ethnl_update_binary(wol.sopass, sizeof(wol.sopass),
+ tb[ETHTOOL_A_WOL_SOPASS], &mod);
+ }
+
+ if (!mod)
+ goto out_ops;
+ ret = dev->ethtool_ops->set_wol(dev, &wol);
+ if (ret)
+ goto out_ops;
+ dev->wol_enabled = !!wol.wolopts;
+ ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 27dc65d7de67..3ba7f61be107 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -35,7 +35,6 @@ static bool seq_nr_after(u16 a, u16 b)
}
#define seq_nr_before(a, b) seq_nr_after((b), (a))
-#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b)))
#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr)
@@ -156,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
new_node->seq_out[i] = seq_out;
spin_lock_bh(&hsr->list_lock);
- list_for_each_entry_rcu(node, node_db, mac_list) {
+ list_for_each_entry_rcu(node, node_db, mac_list,
+ lockdep_is_held(&hsr->list_lock)) {
if (ether_addr_equal(node->macaddress_A, addr))
goto out;
if (ether_addr_equal(node->macaddress_B, addr))
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index d40de84a637f..754d84b217f0 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -191,7 +191,7 @@ void hsr_debugfs_term(struct hsr_priv *priv);
void hsr_debugfs_create_root(void);
void hsr_debugfs_remove_root(void);
#else
-static inline void void hsr_debugfs_rename(struct net_device *dev)
+static inline void hsr_debugfs_rename(struct net_device *dev)
{
}
static inline void hsr_debugfs_init(struct hsr_priv *priv,
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index ee561297d8a7..fbfd0db182b7 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
rcu_read_lock(); /* hsr->node_db, hsr->ports */
port = hsr_port_get_rcu(skb->dev);
+ if (!port)
+ goto finish_pass;
if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) {
/* Directly kill frames sent by ourselves */
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 2c7a38d76a3a..0672b2f01586 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -21,7 +21,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, },
[IEEE802154_ATTR_PAGE] = { .type = NLA_U8, },
+ [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, },
[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index fc816b187170..f96bd489b362 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -378,6 +378,17 @@ config INET_ESP_OFFLOAD
If unsure, say N.
+config INET_ESPINTCP
+ bool "IP: ESP in TCP encapsulation (RFC 8229)"
+ depends on XFRM && INET_ESP
+ select STREAM_PARSER
+ select NET_SOCK_MSG
+ help
+ Support for RFC 8229 encapsulation of ESP and IKE over
+ TCP/IPv4 sockets.
+
+ If unsure, say N.
+
config INET_IPCOMP
tristate "IP: IPComp transformation"
select INET_XFRM_TUNNEL
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index d57ecfaf89d4..9d97bace13c8 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -65,3 +65,7 @@ obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
+
+ifeq ($(CONFIG_BPF_JIT),y)
+obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o
+endif
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
new file mode 100644
index 000000000000..574972bc7299
--- /dev/null
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <linux/types.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <net/tcp.h>
+
+static u32 optional_ops[] = {
+ offsetof(struct tcp_congestion_ops, init),
+ offsetof(struct tcp_congestion_ops, release),
+ offsetof(struct tcp_congestion_ops, set_state),
+ offsetof(struct tcp_congestion_ops, cwnd_event),
+ offsetof(struct tcp_congestion_ops, in_ack_event),
+ offsetof(struct tcp_congestion_ops, pkts_acked),
+ offsetof(struct tcp_congestion_ops, min_tso_segs),
+ offsetof(struct tcp_congestion_ops, sndbuf_expand),
+ offsetof(struct tcp_congestion_ops, cong_control),
+};
+
+static u32 unsupported_ops[] = {
+ offsetof(struct tcp_congestion_ops, get_info),
+};
+
+static const struct btf_type *tcp_sock_type;
+static u32 tcp_sock_id, sock_id;
+
+static int bpf_tcp_ca_init(struct btf *btf)
+{
+ s32 type_id;
+
+ type_id = btf_find_by_name_kind(btf, "sock", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ sock_id = type_id;
+
+ type_id = btf_find_by_name_kind(btf, "tcp_sock", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ tcp_sock_id = type_id;
+ tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
+
+ return 0;
+}
+
+static bool is_optional(u32 member_offset)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
+ if (member_offset == optional_ops[i])
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_unsupported(u32 member_offset)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(unsupported_ops); i++) {
+ if (member_offset == unsupported_ops[i])
+ return true;
+ }
+
+ return false;
+}
+
+extern struct btf *btf_vmlinux;
+
+static bool bpf_tcp_ca_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
+ return false;
+ if (type != BPF_READ)
+ return false;
+ if (off % size != 0)
+ return false;
+
+ if (!btf_ctx_access(off, size, type, prog, info))
+ return false;
+
+ if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
+ /* promote it to tcp_sock */
+ info->btf_id = tcp_sock_id;
+
+ return true;
+}
+
+static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id)
+{
+ size_t end;
+
+ if (atype == BPF_READ)
+ return btf_struct_access(log, t, off, size, atype, next_btf_id);
+
+ if (t != tcp_sock_type) {
+ bpf_log(log, "only read is supported\n");
+ return -EACCES;
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
+ end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
+ break;
+ case offsetof(struct inet_connection_sock, icsk_ack.pending):
+ end = offsetofend(struct inet_connection_sock,
+ icsk_ack.pending);
+ break;
+ case offsetof(struct tcp_sock, snd_cwnd):
+ end = offsetofend(struct tcp_sock, snd_cwnd);
+ break;
+ case offsetof(struct tcp_sock, snd_cwnd_cnt):
+ end = offsetofend(struct tcp_sock, snd_cwnd_cnt);
+ break;
+ case offsetof(struct tcp_sock, snd_ssthresh):
+ end = offsetofend(struct tcp_sock, snd_ssthresh);
+ break;
+ case offsetof(struct tcp_sock, ecn_flags):
+ end = offsetofend(struct tcp_sock, ecn_flags);
+ break;
+ default:
+ bpf_log(log, "no write support to tcp_sock at off %d\n", off);
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log,
+ "write access at off %d with size %d beyond the member of tcp_sock ended at %zu\n",
+ off, size, end);
+ return -EACCES;
+ }
+
+ return NOT_INIT;
+}
+
+BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt)
+{
+ /* bpf_tcp_ca prog cannot have NULL tp */
+ __tcp_send_ack((struct sock *)tp, rcv_nxt);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
+ .func = bpf_tcp_send_ack,
+ .gpl_only = false,
+ /* In case we want to report error later */
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_ANYTHING,
+ .btf_id = &tcp_sock_id,
+};
+
+static const struct bpf_func_proto *
+bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_tcp_send_ack:
+ return &bpf_tcp_send_ack_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
+ .get_func_proto = bpf_tcp_ca_get_func_proto,
+ .is_valid_access = bpf_tcp_ca_is_valid_access,
+ .btf_struct_access = bpf_tcp_ca_btf_struct_access,
+};
+
+static int bpf_tcp_ca_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct tcp_congestion_ops *utcp_ca;
+ struct tcp_congestion_ops *tcp_ca;
+ size_t tcp_ca_name_len;
+ int prog_fd;
+ u32 moff;
+
+ utcp_ca = (const struct tcp_congestion_ops *)udata;
+ tcp_ca = (struct tcp_congestion_ops *)kdata;
+
+ moff = btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct tcp_congestion_ops, flags):
+ if (utcp_ca->flags & ~TCP_CONG_MASK)
+ return -EINVAL;
+ tcp_ca->flags = utcp_ca->flags;
+ return 1;
+ case offsetof(struct tcp_congestion_ops, name):
+ tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name));
+ if (!tcp_ca_name_len ||
+ tcp_ca_name_len == sizeof(utcp_ca->name))
+ return -EINVAL;
+ if (tcp_ca_find(utcp_ca->name))
+ return -EEXIST;
+ memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name));
+ return 1;
+ }
+
+ if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
+ return 0;
+
+ /* Ensure bpf_prog is provided for compulsory func ptr */
+ prog_fd = (int)(*(unsigned long *)(udata + moff));
+ if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int bpf_tcp_ca_check_member(const struct btf_type *t,
+ const struct btf_member *member)
+{
+ if (is_unsupported(btf_member_bit_offset(t, member) / 8))
+ return -ENOTSUPP;
+ return 0;
+}
+
+static int bpf_tcp_ca_reg(void *kdata)
+{
+ return tcp_register_congestion_control(kdata);
+}
+
+static void bpf_tcp_ca_unreg(void *kdata)
+{
+ tcp_unregister_congestion_control(kdata);
+}
+
+/* Avoid sparse warning. It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+
+struct bpf_struct_ops bpf_tcp_congestion_ops = {
+ .verifier_ops = &bpf_tcp_ca_verifier_ops,
+ .reg = bpf_tcp_ca_reg,
+ .unreg = bpf_tcp_ca_unreg,
+ .check_member = bpf_tcp_ca_check_member,
+ .init_member = bpf_tcp_ca_init_member,
+ .init = bpf_tcp_ca_init,
+ .name = "tcp_congestion_ops",
+};
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 376882215919..0bd10a1f477f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
+ int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
@@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options));
opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
- if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
return;
if (gateway)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 5c967764041f..103c7d599a3c 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -18,6 +18,8 @@
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/espintcp.h>
#include <linux/highmem.h>
@@ -117,6 +119,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
put_page(sg_page(sg));
}
+#ifdef CONFIG_INET_ESPINTCP
+struct esp_tcp_sk {
+ struct sock *sk;
+ struct rcu_head rcu;
+};
+
+static void esp_free_tcp_sk(struct rcu_head *head)
+{
+ struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+
+ sock_put(esk->sk);
+ kfree(esk);
+}
+
+static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
+{
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct esp_tcp_sk *esk;
+ __be16 sport, dport;
+ struct sock *nsk;
+ struct sock *sk;
+
+ sk = rcu_dereference(x->encap_sk);
+ if (sk && sk->sk_state == TCP_ESTABLISHED)
+ return sk;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ nsk = rcu_dereference_protected(x->encap_sk,
+ lockdep_is_held(&x->lock));
+ if (sk && sk == nsk) {
+ esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+ if (!esk) {
+ spin_unlock_bh(&x->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+ RCU_INIT_POINTER(x->encap_sk, NULL);
+ esk->sk = sk;
+ call_rcu(&esk->rcu, esp_free_tcp_sk);
+ }
+ spin_unlock_bh(&x->lock);
+
+ sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
+ dport, x->props.saddr.a4, sport, 0);
+ if (!sk)
+ return ERR_PTR(-ENOENT);
+
+ if (!tcp_is_ulp_esp(sk)) {
+ sock_put(sk);
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock_bh(&x->lock);
+ nsk = rcu_dereference_protected(x->encap_sk,
+ lockdep_is_held(&x->lock));
+ if (encap->encap_sport != sport ||
+ encap->encap_dport != dport) {
+ sock_put(sk);
+ sk = nsk ?: ERR_PTR(-EREMCHG);
+ } else if (sk == nsk) {
+ sock_put(sk);
+ } else {
+ rcu_assign_pointer(x->encap_sk, sk);
+ }
+ spin_unlock_bh(&x->lock);
+
+ return sk;
+}
+
+static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct sock *sk;
+ int err;
+
+ rcu_read_lock();
+
+ sk = esp_find_tcp_sk(x);
+ err = PTR_ERR_OR_ZERO(sk);
+ if (err)
+ goto out;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk))
+ err = espintcp_queue_out(sk, skb);
+ else
+ err = espintcp_push_skb(sk, skb);
+ bh_unlock_sock(sk);
+
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+
+ return esp_output_tcp_finish(x, skb);
+}
+
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ local_bh_disable();
+ err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+ local_bh_enable();
+
+ /* EINPROGRESS just happens to do the right thing. It
+ * actually means that the skb has been consumed and
+ * isn't coming back.
+ */
+ return err ?: -EINPROGRESS;
+}
+#else
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+
+ return -EOPNOTSUPP;
+}
+#endif
+
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -147,7 +275,11 @@ static void esp_output_done(struct crypto_async_request *base, int err)
secpath_reset(skb);
xfrm_dev_resume(skb);
} else {
- xfrm_output_resume(skb, err);
+ if (!err &&
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+ esp_output_tail_tcp(x, skb);
+ else
+ xfrm_output_resume(skb, err);
}
}
@@ -225,45 +357,100 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
+ int encap_type,
+ struct esp_info *esp,
+ __be16 sport,
+ __be16 dport)
{
- int encap_type;
struct udphdr *uh;
__be32 *udpdata32;
- __be16 sport, dport;
- struct xfrm_encap_tmpl *encap = x->encap;
- struct ip_esp_hdr *esph = esp->esph;
unsigned int len;
- spin_lock_bh(&x->lock);
- sport = encap->encap_sport;
- dport = encap->encap_dport;
- encap_type = encap->encap_type;
- spin_unlock_bh(&x->lock);
-
len = skb->len + esp->tailen - skb_transport_offset(skb);
- if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
- return -EMSGSIZE;
+ if (len + sizeof(struct iphdr) > IP_MAX_MTU)
+ return ERR_PTR(-EMSGSIZE);
- uh = (struct udphdr *)esph;
+ uh = (struct udphdr *)esp->esph;
uh->source = sport;
uh->dest = dport;
uh->len = htons(len);
uh->check = 0;
+ *skb_mac_header(skb) = IPPROTO_UDP;
+
+ if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
+ udpdata32 = (__be32 *)(uh + 1);
+ udpdata32[0] = udpdata32[1] = 0;
+ return (struct ip_esp_hdr *)(udpdata32 + 2);
+ }
+
+ return (struct ip_esp_hdr *)(uh + 1);
+}
+
+#ifdef CONFIG_INET_ESPINTCP
+static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
+ struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ __be16 *lenp = (void *)esp->esph;
+ struct ip_esp_hdr *esph;
+ unsigned int len;
+ struct sock *sk;
+
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len > IP_MAX_MTU)
+ return ERR_PTR(-EMSGSIZE);
+
+ rcu_read_lock();
+ sk = esp_find_tcp_sk(x);
+ rcu_read_unlock();
+
+ if (IS_ERR(sk))
+ return ERR_CAST(sk);
+
+ *lenp = htons(len);
+ esph = (struct ip_esp_hdr *)(lenp + 1);
+
+ return esph;
+}
+#else
+static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
+ struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+#endif
+
+static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct ip_esp_hdr *esph;
+ __be16 sport, dport;
+ int encap_type;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ encap_type = encap->encap_type;
+ spin_unlock_bh(&x->lock);
+
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- esph = (struct ip_esp_hdr *)(uh + 1);
- break;
case UDP_ENCAP_ESPINUDP_NON_IKE:
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- esph = (struct ip_esp_hdr *)(udpdata32 + 2);
+ esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport);
+ break;
+ case TCP_ENCAP_ESPINTCP:
+ esph = esp_output_tcp_encap(x, skb, esp);
break;
}
- *skb_mac_header(skb) = IPPROTO_UDP;
+ if (IS_ERR(esph))
+ return PTR_ERR(esph);
+
esp->esph = esph;
return 0;
@@ -279,9 +466,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
struct sk_buff *trailer;
int tailen = esp->tailen;
- /* this is non-NULL only with UDP Encapsulation */
+ /* this is non-NULL only with TCP/UDP Encapsulation */
if (x->encap) {
- int err = esp_output_udp_encap(x, skb, esp);
+ int err = esp_output_encap(x, skb, esp);
if (err < 0)
return err;
@@ -474,6 +661,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
if (sg != dsg)
esp_ssg_unref(x, tmp);
+ if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+ err = esp_output_tail_tcp(x, skb);
+
error_free:
kfree(tmp);
error:
@@ -600,7 +790,23 @@ int esp_input_done2(struct sk_buff *skb, int err)
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
+ struct tcphdr *th = (void *)(skb_network_header(skb) + ihl);
struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
+ __be16 source;
+
+ switch (x->encap->encap_type) {
+ case TCP_ENCAP_ESPINTCP:
+ source = th->source;
+ break;
+ case UDP_ENCAP_ESPINUDP:
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ source = uh->source;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
+ goto out;
+ }
/*
* 1) if the NAT-T peer's IP or port changed then
@@ -609,11 +815,11 @@ int esp_input_done2(struct sk_buff *skb, int err)
* SRC ports.
*/
if (iph->saddr != x->props.saddr.a4 ||
- uh->source != encap->encap_sport) {
+ source != encap->encap_sport) {
xfrm_address_t ipaddr;
ipaddr.a4 = iph->saddr;
- km_new_mapping(x, &ipaddr, uh->source);
+ km_new_mapping(x, &ipaddr, source);
/* XXX: perhaps add an extra
* policy check here, to see
@@ -988,6 +1194,14 @@ static int esp_init_state(struct xfrm_state *x)
case UDP_ENCAP_ESPINUDP_NON_IKE:
x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
break;
+#ifdef CONFIG_INET_ESPINTCP
+ case TCP_ENCAP_ESPINTCP:
+ /* only the length field, TCP encap is done by
+ * the socket
+ */
+ x->props.header_len += 2;
+ break;
+#endif
}
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 0e4a7cf6bc87..e2e219c7854a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -57,6 +57,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (!x)
goto out_reset;
+ skb->mark = xfrm_smark_get(skb->mark, x);
+
sp->xvec[sp->len++] = x;
sp->olen++;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a68b5e21ec51..c092e9a55790 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -16,6 +16,9 @@ struct fib_alias {
u8 fa_slen;
u32 tb_id;
s16 fa_default;
+ u8 offload:1,
+ trap:1,
+ unused:6;
struct rcu_head rcu;
};
@@ -35,9 +38,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack);
bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
-int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
- u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
- unsigned int);
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+ struct fib_rt_info *fri, unsigned int flags);
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f1888c683426..a803cdd9400a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -504,6 +504,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
int dst_len, u32 tb_id, const struct nl_info *info,
unsigned int nlm_flags)
{
+ struct fib_rt_info fri;
struct sk_buff *skb;
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
@@ -512,9 +513,15 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
if (!skb)
goto errout;
- err = fib_dump_info(skb, info->portid, seq, event, tb_id,
- fa->fa_type, key, dst_len,
- fa->fa_tos, fa->fa_info, nlm_flags);
+ fri.fi = fa->fa_info;
+ fri.tb_id = tb_id;
+ fri.dst = key;
+ fri.dst_len = dst_len;
+ fri.tos = fa->fa_tos;
+ fri.type = fa->fa_type;
+ fri.offload = fa->offload;
+ fri.trap = fa->trap;
+ err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -1725,10 +1732,11 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
- u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
- struct fib_info *fi, unsigned int flags)
+ struct fib_rt_info *fri, unsigned int flags)
{
- unsigned int nhs = fib_info_num_path(fi);
+ unsigned int nhs = fib_info_num_path(fri->fi);
+ struct fib_info *fi = fri->fi;
+ u32 tb_id = fri->tb_id;
struct nlmsghdr *nlh;
struct rtmsg *rtm;
@@ -1738,22 +1746,22 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm = nlmsg_data(nlh);
rtm->rtm_family = AF_INET;
- rtm->rtm_dst_len = dst_len;
+ rtm->rtm_dst_len = fri->dst_len;
rtm->rtm_src_len = 0;
- rtm->rtm_tos = tos;
+ rtm->rtm_tos = fri->tos;
if (tb_id < 256)
rtm->rtm_table = tb_id;
else
rtm->rtm_table = RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, tb_id))
goto nla_put_failure;
- rtm->rtm_type = type;
+ rtm->rtm_type = fri->type;
rtm->rtm_flags = fi->fib_flags;
rtm->rtm_scope = fi->fib_scope;
rtm->rtm_protocol = fi->fib_protocol;
if (rtm->rtm_dst_len &&
- nla_put_in_addr(skb, RTA_DST, dst))
+ nla_put_in_addr(skb, RTA_DST, fri->dst))
goto nla_put_failure;
if (fi->fib_priority &&
nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
@@ -1795,6 +1803,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
}
+ if (fri->offload)
+ rtm->rtm_flags |= RTM_F_OFFLOAD;
+ if (fri->trap)
+ rtm->rtm_flags |= RTM_F_TRAP;
+
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 195469a13371..ff0c24371e33 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -980,9 +980,12 @@ static struct key_vector *fib_find_node(struct trie *t,
/* Return the first fib alias matching TOS with
* priority less than or equal to PRIO.
+ * If 'find_first' is set, return the first matching
+ * fib alias, regardless of TOS and priority.
*/
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
- u8 tos, u32 prio, u32 tb_id)
+ u8 tos, u32 prio, u32 tb_id,
+ bool find_first)
{
struct fib_alias *fa;
@@ -998,6 +1001,8 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
continue;
if (fa->tb_id != tb_id)
break;
+ if (find_first)
+ return fa;
if (fa->fa_tos > tos)
continue;
if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1007,6 +1012,52 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
return NULL;
}
+static struct fib_alias *
+fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
+{
+ u8 slen = KEYLENGTH - fri->dst_len;
+ struct key_vector *l, *tp;
+ struct fib_table *tb;
+ struct fib_alias *fa;
+ struct trie *t;
+
+ tb = fib_get_table(net, fri->tb_id);
+ if (!tb)
+ return NULL;
+
+ t = (struct trie *)tb->tb_data;
+ l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
+ if (!l)
+ return NULL;
+
+ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
+ fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
+ fa->fa_type == fri->type)
+ return fa;
+ }
+
+ return NULL;
+}
+
+void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+{
+ struct fib_alias *fa_match;
+
+ rcu_read_lock();
+
+ fa_match = fib_find_matching_alias(net, fri);
+ if (!fa_match)
+ goto out;
+
+ fa_match->offload = fri->offload;
+ fa_match->trap = fri->trap;
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
+
static void trie_rebalance(struct trie *t, struct key_vector *tn)
{
while (!IS_TRIE(tn))
@@ -1063,9 +1114,6 @@ noleaf:
return -ENOMEM;
}
-/* fib notifier for ADD is sent before calling fib_insert_alias with
- * the expectation that the only possible failure ENOMEM
- */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1118,11 +1166,13 @@ static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
return true;
}
+static void fib_remove_alias(struct trie *t, struct key_vector *tp,
+ struct key_vector *l, struct fib_alias *old);
+
/* Caller must hold RTNL. */
int fib_table_insert(struct net *net, struct fib_table *tb,
struct fib_config *cfg, struct netlink_ext_ack *extack)
{
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
struct trie *t = (struct trie *)tb->tb_data;
struct fib_alias *fa, *new_fa;
struct key_vector *l, *tp;
@@ -1149,7 +1199,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
l = fib_find_node(t, &tp, key);
fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
- tb->tb_id) : NULL;
+ tb->tb_id, false) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
* with the same keys [prefix,tos,priority], if such key already
@@ -1216,19 +1266,29 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_slen = fa->fa_slen;
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
+ new_fa->offload = 0;
+ new_fa->trap = 0;
- err = call_fib_entry_notifiers(net,
- FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa,
- extack);
- if (err)
- goto out_free_new_fa;
+ hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+
+ if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
+ tb->tb_id, true) == new_fa) {
+ enum fib_event_type fib_event;
+
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ err = call_fib_entry_notifiers(net, fib_event,
+ key, plen,
+ new_fa, extack);
+ if (err) {
+ hlist_replace_rcu(&new_fa->fa_list,
+ &fa->fa_list);
+ goto out_free_new_fa;
+ }
+ }
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
- hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
-
alias_free_mem_rcu(fa);
fib_release_info(fi_drop);
@@ -1244,12 +1304,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
if (fa_match)
goto out;
- if (cfg->fc_nlflags & NLM_F_APPEND) {
- event = FIB_EVENT_ENTRY_APPEND;
+ if (cfg->fc_nlflags & NLM_F_APPEND)
nlflags |= NLM_F_APPEND;
- } else {
+ else
fa = fa_first;
- }
}
err = -ENOENT;
if (!(cfg->fc_nlflags & NLM_F_CREATE))
@@ -1268,15 +1326,29 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_slen = slen;
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
-
- err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
- if (err)
- goto out_free_new_fa;
+ new_fa->offload = 0;
+ new_fa->trap = 0;
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_fib_notif;
+ goto out_free_new_fa;
+
+ /* The alias was already inserted, so the node must exist. */
+ l = l ? l : fib_find_node(t, &tp, key);
+ if (WARN_ON_ONCE(!l))
+ goto out_free_new_fa;
+
+ if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
+ new_fa) {
+ enum fib_event_type fib_event;
+
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ err = call_fib_entry_notifiers(net, fib_event, key, plen,
+ new_fa, extack);
+ if (err)
+ goto out_remove_new_fa;
+ }
if (!plen)
tb->tb_num_default++;
@@ -1287,14 +1359,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
succeeded:
return 0;
-out_fib_notif:
- /* notifier was sent that entry would be added to trie, but
- * the add failed and need to recover. Only failure for
- * fib_insert_alias is ENOMEM.
- */
- NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
- plen, new_fa, NULL);
+out_remove_new_fa:
+ fib_remove_alias(t, tp, l, new_fa);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1545,6 +1611,36 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
node_pull_suffix(tp, fa->fa_slen);
}
+static void fib_notify_alias_delete(struct net *net, u32 key,
+ struct hlist_head *fah,
+ struct fib_alias *fa_to_delete,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_alias *fa_next, *fa_to_notify;
+ u32 tb_id = fa_to_delete->tb_id;
+ u8 slen = fa_to_delete->fa_slen;
+ enum fib_event_type fib_event;
+
+ /* Do not notify if we do not care about the route. */
+ if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
+ return;
+
+ /* Determine if the route should be replaced by the next route in the
+ * list.
+ */
+ fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
+ struct fib_alias, fa_list);
+ if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ fa_to_notify = fa_next;
+ } else {
+ fib_event = FIB_EVENT_ENTRY_DEL;
+ fa_to_notify = fa_to_delete;
+ }
+ call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
+ fa_to_notify, extack);
+}
+
/* Caller must hold RTNL. */
int fib_table_delete(struct net *net, struct fib_table *tb,
struct fib_config *cfg, struct netlink_ext_ack *extack)
@@ -1566,7 +1662,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!l)
return -ESRCH;
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
+ fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
if (!fa)
return -ESRCH;
@@ -1598,8 +1694,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!fa_to_delete)
return -ESRCH;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
- fa_to_delete, extack);
+ fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1923,10 +2018,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
continue;
}
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
- n->key,
- KEYLENGTH - fa->fa_slen, fa,
- NULL);
+ fib_notify_alias_delete(net, n->key, &n->leaf, fa,
+ NULL);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -2022,6 +2115,7 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
struct netlink_ext_ack *extack)
{
struct fib_alias *fa;
+ int last_slen = -1;
int err;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
@@ -2036,8 +2130,12 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
if (tb->tb_id != fa->tb_id)
continue;
- err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen,
+ if (fa->fa_slen == last_slen)
+ continue;
+
+ last_slen = fa->fa_slen;
+ err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
+ l->key, KEYLENGTH - fa->fa_slen,
fa, extack);
if (err)
return err;
@@ -2146,14 +2244,20 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
if (filter->dump_routes) {
if (!s_fa) {
+ struct fib_rt_info fri;
+
+ fri.fi = fi;
+ fri.tb_id = tb->tb_id;
+ fri.dst = xkey;
+ fri.dst_len = KEYLENGTH - fa->fa_slen;
+ fri.tos = fa->fa_tos;
+ fri.type = fa->fa_type;
+ fri.offload = fa->offload;
+ fri.trap = fa->trap;
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->tb_id, fa->fa_type,
- xkey,
- KEYLENGTH - fa->fa_slen,
- fa->fa_tos, fi, flags);
+ RTM_NEWROUTE, &fri, flags);
if (err < 0)
goto stop;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 30fa771d382a..dcc79ff54b41 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -662,8 +662,8 @@ static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
[FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
[FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
- [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), },
- [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), },
+ [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), },
+ [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), },
[FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
[FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
};
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5fd6e8ed02b5..66fdbfe5447c 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
-/* Fills in tpi and returns header length to be pulled. */
+/* Fills in tpi and returns header length to be pulled.
+ * Note that caller must use pskb_may_pull() before pulling GRE header.
+ */
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs)
{
@@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ u8 _val, *val;
+
+ val = skb_header_pointer(skb, nhs + hdr_len,
+ sizeof(_val), &_val);
+ if (!val)
+ return -EINVAL;
tpi->proto = proto;
- if ((*(u8 *)options & 0xF0) != 0x40)
+ if ((*val & 0xF0) != 0x40)
hdr_len += 4;
}
tpi->hdr_len = hdr_len;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 4de7e962d3da..2e6d1b7a7bc9 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
if (skb_gro_checksum_simple_validate(skb))
goto out_unlock;
- skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ skb_gro_checksum_try_convert(skb, IPPROTO_GRE,
null_compute_pseudo);
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 18068ed42f25..f369e7ce685b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -748,6 +748,39 @@ out:;
}
EXPORT_SYMBOL(__icmp_send);
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ __be32 orig_ip;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmp_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct iphdr))))
+ goto out;
+
+ orig_ip = ip_hdr(skb_in)->saddr;
+ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ icmp_send(skb_in, type, code, info);
+ ip_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 18c0d5bffe12..d545fb99a8a1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -482,8 +482,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}
spin_unlock_bh(&queue->fastopenq.lock);
}
+
out:
release_sock(sk);
+ if (newsk && mem_cgroup_sockets_enabled) {
+ int amt;
+
+ /* atomically get the memory usage, set and charge the
+ * newsk->sk_memcg.
+ */
+ lock_sock(newsk);
+
+ /* The socket has not been accepted yet, no need to look at
+ * newsk->sk_wmem_queued.
+ */
+ amt = sk_mem_pages(newsk->sk_forward_alloc +
+ atomic_read(&newsk->sk_rmem_alloc));
+ mem_cgroup_sk_alloc(newsk);
+ if (newsk->sk_memcg && amt)
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+
+ release_sock(newsk);
+ }
if (req)
reqsk_put(req);
return newsk;
@@ -610,12 +630,6 @@ no_route:
}
EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
-#if IS_ENABLED(CONFIG_IPV6)
-#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
-#else
-#define AF_INET_FAMILY(fam) true
-#endif
-
/* Decide when to expire the request and when to resend SYN-ACK */
static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
const int max_retries,
@@ -770,6 +784,18 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority)
+{
+ struct inet_connection_sock *icsk = inet_csk(newsk);
+
+ if (!icsk->icsk_ulp_ops)
+ return;
+
+ if (icsk->icsk_ulp_ops->clone)
+ icsk->icsk_ulp_ops->clone(req, newsk, priority);
+}
+
/**
* inet_csk_clone_lock - clone an inet socket, and lock its clone
* @sk: the socket to clone
@@ -810,6 +836,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+ inet_clone_ulp(req, newsk, priority);
+
security_inet_csk_clone(newsk, req);
}
return newsk;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f11e997e517b..8c8377568a78 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -100,13 +100,9 @@ static size_t inet_sk_attr_size(struct sock *sk,
aux = handler->idiag_get_aux_size(sk, net_admin);
return nla_total_size(sizeof(struct tcp_info))
- + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
- + nla_total_size(1) /* INET_DIAG_TOS */
- + nla_total_size(1) /* INET_DIAG_TCLASS */
- + nla_total_size(4) /* INET_DIAG_MARK */
- + nla_total_size(4) /* INET_DIAG_CLASS_ID */
- + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(sizeof(struct inet_diag_msg))
+ + inet_diag_msg_attrs_size()
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ nla_total_size(TCP_CA_NAME_MAX)
+ nla_total_size(sizeof(struct tcpvegas_info))
@@ -147,6 +143,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
goto errout;
+ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+ ext & (1 << (INET_DIAG_TCLASS - 1))) {
+ u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+ /* Fallback to socket priority if class id isn't set.
+ * Classful qdiscs use it as direct reference to class.
+ * For cgroup2 classid is always zero.
+ */
+ if (!classid)
+ classid = sk->sk_priority;
+
+ if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+ goto errout;
+ }
+
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
@@ -284,24 +298,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
goto errout;
}
- if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
- ext & (1 << (INET_DIAG_TCLASS - 1))) {
- u32 classid = 0;
-
-#ifdef CONFIG_SOCK_CGROUP_DATA
- classid = sock_cgroup_classid(&sk->sk_cgrp_data);
-#endif
- /* Fallback to socket priority if class id isn't set.
- * Classful qdiscs use it as direct reference to class.
- * For cgroup2 classid is always zero.
- */
- if (!classid)
- classid = sk->sk_priority;
-
- if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
- goto errout;
- }
-
out:
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 14db1e0b8a6e..d84819893db9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -240,8 +240,8 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *skb, unsigned int mtu)
{
+ struct sk_buff *segs, *nskb;
netdev_features_t features;
- struct sk_buff *segs;
int ret = 0;
/* common case: seglen is <= mtu
@@ -272,8 +272,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
consume_skb(skb);
- do {
- struct sk_buff *nskb = segs->next;
+ skb_list_walk_safe(segs, segs, nskb) {
int err;
skb_mark_not_on_list(segs);
@@ -281,8 +280,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
if (err && ret == 0)
ret = err;
- segs = nskb;
- } while (segs);
+ }
return ret;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0fe2a5d3e258..74e1d964a615 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1236,10 +1236,8 @@ int ip_tunnel_init(struct net_device *dev)
iph->version = 4;
iph->ihl = 5;
- if (tunnel->collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
+ if (tunnel->collect_md)
netif_keep_dst(dev);
- }
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e90b600c7a25..37cddd18f282 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -187,8 +187,17 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
int mtu;
if (!dst) {
- dev->stats.tx_carrier_errors++;
- goto tx_error_icmp;
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
}
dst_hold(dst);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f35308ff84c3..4438f6b12335 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1334,7 +1334,7 @@ static int __init ipconfig_proc_net_init(void)
/* Create a new file under /proc/net/ipconfig */
static int ipconfig_proc_net_create(const char *name,
- const struct file_operations *fops)
+ const struct proc_ops *proc_ops)
{
char *pname;
struct proc_dir_entry *p;
@@ -1346,7 +1346,7 @@ static int ipconfig_proc_net_create(const char *name,
if (!pname)
return -ENOMEM;
- p = proc_create(pname, 0444, init_net.proc_net, fops);
+ p = proc_create(pname, 0444, init_net.proc_net, proc_ops);
kfree(pname);
if (!p)
return -ENOMEM;
@@ -1355,7 +1355,7 @@ static int ipconfig_proc_net_create(const char *name,
}
/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */
-static int ntp_servers_seq_show(struct seq_file *seq, void *v)
+static int ntp_servers_show(struct seq_file *seq, void *v)
{
int i;
@@ -1365,7 +1365,7 @@ static int ntp_servers_seq_show(struct seq_file *seq, void *v)
}
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
+DEFINE_PROC_SHOW_ATTRIBUTE(ntp_servers);
#endif /* CONFIG_PROC_FS */
/*
@@ -1456,7 +1456,7 @@ static int __init ip_auto_config(void)
proc_create_single("pnp", 0444, init_net.proc_net, pnp_seq_show);
if (ipconfig_proc_net_init() == 0)
- ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops);
+ ipconfig_proc_net_create("ntp_servers", &ntp_servers_proc_ops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6bdb1ab8af61..f8755a4ae9d4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -58,7 +58,7 @@ struct clusterip_config {
};
#ifdef CONFIG_PROC_FS
-static const struct file_operations clusterip_proc_fops;
+static const struct proc_ops clusterip_proc_ops;
#endif
struct clusterip_net {
@@ -280,7 +280,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
mutex_lock(&cn->mutex);
c->pde = proc_create_data(buffer, 0600,
cn->procdir,
- &clusterip_proc_fops, c);
+ &clusterip_proc_ops, c);
mutex_unlock(&cn->mutex);
if (!c->pde) {
err = -ENOMEM;
@@ -804,12 +804,12 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
return size;
}
-static const struct file_operations clusterip_proc_fops = {
- .open = clusterip_proc_open,
- .read = seq_read,
- .write = clusterip_proc_write,
- .llseek = seq_lseek,
- .release = clusterip_proc_release,
+static const struct proc_ops clusterip_proc_ops = {
+ .proc_open = clusterip_proc_open,
+ .proc_read = seq_read,
+ .proc_write = clusterip_proc_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = clusterip_proc_release,
};
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 511eaa94e2d1..d072c326dd64 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -321,7 +321,9 @@ static size_t nh_nlmsg_size_single(struct nexthop *nh)
static size_t nh_nlmsg_size(struct nexthop *nh)
{
- size_t sz = nla_total_size(4); /* NHA_ID */
+ size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
+
+ sz += nla_total_size(4); /* NHA_ID */
if (nh->is_group)
sz += nh_nlmsg_size_grp(nh);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cc90243ccf76..2580303249e2 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -289,6 +289,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
+ SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH),
+ SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e35736b99300..a93e7d1e1251 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -100,8 +100,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb,
if (IS_ERR(sk))
return PTR_ERR(sk);
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep) {
sock_put(sk);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 87e979f2b74a..ebe7060d0fc9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -237,11 +237,11 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file)
return seq_open(file, &rt_cache_seq_ops);
}
-static const struct file_operations rt_cache_seq_fops = {
- .open = rt_cache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops rt_cache_proc_ops = {
+ .proc_open = rt_cache_seq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
@@ -271,6 +271,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
*pos = cpu+1;
return &per_cpu(rt_cache_stat, cpu);
}
+ (*pos)++;
return NULL;
}
@@ -327,11 +328,11 @@ static int rt_cpu_seq_open(struct inode *inode, struct file *file)
return seq_open(file, &rt_cpu_seq_ops);
}
-static const struct file_operations rt_cpu_seq_fops = {
- .open = rt_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops rt_cpu_proc_ops = {
+ .proc_open = rt_cpu_seq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -365,12 +366,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
struct proc_dir_entry *pde;
pde = proc_create("rt_cache", 0444, net->proc_net,
- &rt_cache_seq_fops);
+ &rt_cache_proc_ops);
if (!pde)
goto err1;
pde = proc_create("rt_cache", 0444,
- net->proc_net_stat, &rt_cpu_seq_fops);
+ net->proc_net_stat, &rt_cpu_proc_ops);
if (!pde)
goto err2;
@@ -3223,16 +3224,41 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb_reset_mac_header(skb);
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
+ struct fib_rt_info fri;
+
if (!res.fi) {
err = fib_props[res.type].error;
if (!err)
err = -EHOSTUNREACH;
goto errout_rcu;
}
+ fri.fi = res.fi;
+ fri.tb_id = table_id;
+ fri.dst = res.prefix;
+ fri.dst_len = res.prefixlen;
+ fri.tos = fl4.flowi4_tos;
+ fri.type = rt->rt_type;
+ fri.offload = 0;
+ fri.trap = 0;
+ if (res.fa_head) {
+ struct fib_alias *fa;
+
+ hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) {
+ u8 slen = 32 - fri.dst_len;
+
+ if (fa->fa_slen == slen &&
+ fa->tb_id == fri.tb_id &&
+ fa->fa_tos == fri.tos &&
+ fa->fa_info == res.fi &&
+ fa->fa_type == fri.type) {
+ fri.offload = fa->offload;
+ fri.trap = fa->trap;
+ break;
+ }
+ }
+ }
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
- rt->rt_type, res.prefix, res.prefixlen,
- fl4.flowi4_tos, res.fi, 0);
+ nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
} else {
err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid,
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 345b2b0ff618..9a4f6b16c9bc 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -349,6 +349,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = 0;
treq->tfo_listener = false;
+
+ if (IS_ENABLED(CONFIG_MPTCP))
+ treq->is_mptcp = 0;
+
if (IS_ENABLED(CONFIG_SMC))
ireq->smc_ok = 0;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fcb2cd167f64..9684af02e0a5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1193,6 +1193,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_no_ssthresh_metrics_save",
+ .data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "tcp_moderate_rcvbuf",
.data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d885ba868822..eb2d80519f8e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/tcp.h>
+#include <net/mptcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -443,8 +444,6 @@ void tcp_init_sock(struct sock *sk)
tp->tsoffset = 0;
tp->rack.reo_wnd_steps = 1;
- sk->sk_state = TCP_CLOSE;
-
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
@@ -692,8 +691,8 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
-static void tcp_push(struct sock *sk, int flags, int mss_now,
- int nonagle, int size_goal)
+void tcp_push(struct sock *sk, int flags, int mss_now,
+ int nonagle, int size_goal)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -927,7 +926,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
return max(size_goal, mss_now);
}
-static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
+int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
{
int mss_now;
@@ -1778,6 +1777,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
if (skb) {
+ if (zc->recv_skip_hint > 0)
+ break;
skb = skb->next;
offset = seq - TCP_SKB_CB(skb)->seq;
} else {
@@ -2524,6 +2525,7 @@ static void tcp_rtx_queue_purge(struct sock *sk)
{
struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
+ tcp_sk(sk)->highest_sack = NULL;
while (p) {
struct sk_buff *skb = rb_to_skb(p);
@@ -2614,17 +2616,18 @@ int tcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(tp->write_seq, seq);
icsk->icsk_backoff = 0;
- tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd = TCP_INIT_CWND;
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
+ tp->delivered = 0;
tp->delivered_ce = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
+ tp->total_retrans = 0;
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
* issue in __tcp_select_window()
@@ -2636,10 +2639,14 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
tp->compressed_ack = 0;
+ tp->segs_in = 0;
+ tp->segs_out = 0;
tp->bytes_sent = 0;
tp->bytes_acked = 0;
tp->bytes_received = 0;
tp->bytes_retrans = 0;
+ tp->data_segs_in = 0;
+ tp->data_segs_out = 0;
tp->duplicate_sack[0].start_seq = 0;
tp->duplicate_sack[0].end_seq = 0;
tp->dsack_dups = 0;
@@ -3336,6 +3343,7 @@ static size_t tcp_opt_stats_get_size(void)
nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
+ nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
0;
}
@@ -3390,6 +3398,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
+ nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
return stats;
}
@@ -4021,4 +4030,5 @@ void __init tcp_init(void)
tcp_metrics_init();
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
tcp_tasklet_init();
+ mptcp_init();
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a6545ef0d27b..6c4d79baff26 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -779,8 +779,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
* bandwidth sample. Delivered is in packets and interval_us in uS and
* ratio will be <<1 for most connections. So delivered is first scaled.
*/
- bw = (u64)rs->delivered * BW_UNIT;
- do_div(bw, rs->interval_us);
+ bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us);
/* If this sample is application-limited, it is likely to have a very
* low delivered count that represents application behavior rather than
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 3737ec096650..3172e31987be 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -21,7 +21,7 @@ static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
/* Simple linear search, don't expect many entries! */
-static struct tcp_congestion_ops *tcp_ca_find(const char *name)
+struct tcp_congestion_ops *tcp_ca_find(const char *name)
{
struct tcp_congestion_ops *e;
@@ -162,7 +162,7 @@ void tcp_assign_congestion_control(struct sock *sk)
rcu_read_lock();
ca = rcu_dereference(net->ipv4.tcp_congestion_control);
- if (unlikely(!try_module_get(ca->owner)))
+ if (unlikely(!bpf_try_module_get(ca, ca->owner)))
ca = &tcp_reno;
icsk->icsk_ca_ops = ca;
rcu_read_unlock();
@@ -208,7 +208,7 @@ void tcp_cleanup_congestion_control(struct sock *sk)
if (icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
- module_put(icsk->icsk_ca_ops->owner);
+ bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
}
/* Used by sysctl to change default congestion control */
@@ -222,12 +222,12 @@ int tcp_set_default_congestion_control(struct net *net, const char *name)
ca = tcp_ca_find_autoload(net, name);
if (!ca) {
ret = -ENOENT;
- } else if (!try_module_get(ca->owner)) {
+ } else if (!bpf_try_module_get(ca, ca->owner)) {
ret = -EBUSY;
} else {
prev = xchg(&net->ipv4.tcp_congestion_control, ca);
if (prev)
- module_put(prev->owner);
+ bpf_module_put(prev, prev->owner);
ca->flags |= TCP_CONG_NON_RESTRICTED;
ret = 0;
@@ -366,19 +366,19 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
} else if (!load) {
const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
- if (try_module_get(ca->owner)) {
+ if (bpf_try_module_get(ca, ca->owner)) {
if (reinit) {
tcp_reinit_congestion_control(sk, ca);
} else {
icsk->icsk_ca_ops = ca;
- module_put(old_ca->owner);
+ bpf_module_put(old_ca, old_ca->owner);
}
} else {
err = -EBUSY;
}
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
err = -EPERM;
- } else if (!try_module_get(ca->owner)) {
+ } else if (!bpf_try_module_get(ca, ca->owner)) {
err = -EBUSY;
} else {
tcp_reinit_congestion_control(sk, ca);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 1b3d032a4df2..8f8eefd3a3ce 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -40,8 +40,8 @@
/* Number of delay samples for detecting the increase of delay */
#define HYSTART_MIN_SAMPLES 8
-#define HYSTART_DELAY_MIN (4U<<3)
-#define HYSTART_DELAY_MAX (16U<<3)
+#define HYSTART_DELAY_MIN (4000U) /* 4 ms */
+#define HYSTART_DELAY_MAX (16000U) /* 16 ms */
#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
static int fast_convergence __read_mostly = 1;
@@ -53,7 +53,7 @@ static int tcp_friendliness __read_mostly = 1;
static int hystart __read_mostly = 1;
static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
static int hystart_low_window __read_mostly = 16;
-static int hystart_ack_delta __read_mostly = 2;
+static int hystart_ack_delta_us __read_mostly = 2000;
static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly;
@@ -77,8 +77,8 @@ MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
" 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
-module_param(hystart_ack_delta, int, 0644);
-MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
+module_param(hystart_ack_delta_us, int, 0644);
+MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)");
/* BIC TCP Parameters */
struct bictcp {
@@ -89,7 +89,7 @@ struct bictcp {
u32 bic_origin_point;/* origin point of bic function */
u32 bic_K; /* time to origin point
from the beginning of the current epoch */
- u32 delay_min; /* min delay (msec << 3) */
+ u32 delay_min; /* min delay (usec) */
u32 epoch_start; /* beginning of an epoch */
u32 ack_cnt; /* number of acks */
u32 tcp_cwnd; /* estimated tcp cwnd */
@@ -117,13 +117,9 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->found = 0;
}
-static inline u32 bictcp_clock(void)
+static inline u32 bictcp_clock_us(const struct sock *sk)
{
-#if HZ < 1000
- return ktime_to_ms(ktime_get_real());
-#else
- return jiffies_to_msecs(jiffies);
-#endif
+ return tcp_sk(sk)->tcp_mstamp;
}
static inline void bictcp_hystart_reset(struct sock *sk)
@@ -131,9 +127,9 @@ static inline void bictcp_hystart_reset(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- ca->round_start = ca->last_ack = bictcp_clock();
+ ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt;
- ca->curr_rtt = 0;
+ ca->curr_rtt = ~0U;
ca->sample_cnt = 0;
}
@@ -276,7 +272,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
*/
t = (s32)(tcp_jiffies32 - ca->epoch_start);
- t += msecs_to_jiffies(ca->delay_min >> 3);
+ t += usecs_to_jiffies(ca->delay_min);
/* change the unit from HZ to bictcp_HZ */
t <<= BICTCP_HZ;
do_div(t, HZ);
@@ -376,22 +372,54 @@ static void bictcp_state(struct sock *sk, u8 new_state)
}
}
+/* Account for TSO/GRO delays.
+ * Otherwise short RTT flows could get too small ssthresh, since during
+ * slow start we begin with small TSO packets and ca->delay_min would
+ * not account for long aggregation delay when TSO packets get bigger.
+ * Ideally even with a very small RTT we would like to have at least one
+ * TSO packet being sent and received by GRO, and another one in qdisc layer.
+ * We apply another 100% factor because @rate is doubled at this point.
+ * We cap the cushion to 1ms.
+ */
+static u32 hystart_ack_delay(struct sock *sk)
+{
+ unsigned long rate;
+
+ rate = READ_ONCE(sk->sk_pacing_rate);
+ if (!rate)
+ return 0;
+ return min_t(u64, USEC_PER_MSEC,
+ div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+}
+
static void hystart_update(struct sock *sk, u32 delay)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
-
- if (ca->found & hystart_detect)
- return;
+ u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) {
- u32 now = bictcp_clock();
+ u32 now = bictcp_clock_us(sk);
/* first detection parameter - ack-train detection */
- if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
+ if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
ca->last_ack = now;
- if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
- ca->found |= HYSTART_ACK_TRAIN;
+
+ threshold = ca->delay_min + hystart_ack_delay(sk);
+
+ /* Hystart ack train triggers if we get ack past
+ * ca->delay_min/2.
+ * Pacing might have delayed packets up to RTT/2
+ * during slow start.
+ */
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ threshold >>= 1;
+
+ if ((s32)(now - ca->round_start) > threshold) {
+ ca->found = 1;
+ pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
+ now - ca->round_start, threshold,
+ ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
@@ -405,14 +433,14 @@ static void hystart_update(struct sock *sk, u32 delay)
if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
- if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
+ if (ca->curr_rtt > delay)
ca->curr_rtt = delay;
ca->sample_cnt++;
} else {
if (ca->curr_rtt > ca->delay_min +
HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
- ca->found |= HYSTART_DELAY;
+ ca->found = 1;
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
@@ -424,9 +452,6 @@ static void hystart_update(struct sock *sk, u32 delay)
}
}
-/* Track delayed acknowledgment ratio using sliding window
- * ratio = (15*ratio + sample) / 16
- */
static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -441,7 +466,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
return;
- delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
+ delay = sample->rtt_us;
if (delay == 0)
delay = 1;
@@ -450,7 +475,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
ca->delay_min = delay;
/* hystart triggers when cwnd is larger than some threshold */
- if (hystart && tcp_in_slow_start(tp) &&
+ if (!ca->found && tcp_in_slow_start(tp) && hystart &&
tp->snd_cwnd >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5347ab2c9c58..6b6b57000dad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -79,6 +79,7 @@
#include <trace/events/tcp.h>
#include <linux/jump_label_ratelimit.h>
#include <net/busy_poll.h>
+#include <net/mptcp.h>
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
@@ -1423,7 +1424,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
- if (!tcp_skb_can_collapse_to(prev))
+ if (!tcp_skb_can_collapse(prev, skb))
goto fallback;
in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
@@ -3164,6 +3165,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
tp->lost_skb_hint = NULL;
+ tcp_highest_sack_replace(sk, skb, next);
tcp_rtx_queue_unlink_and_free(skb, sk);
}
@@ -3554,7 +3556,7 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
return;
- if (unlikely(rexmit == 2)) {
+ if (unlikely(rexmit == REXMIT_NEW)) {
__tcp_push_pending_frames(sk, tcp_current_mss(sk),
TCP_NAGLE_OFF);
if (after(tp->snd_nxt, tp->high_seq))
@@ -3924,6 +3926,10 @@ void tcp_parse_options(const struct net *net,
*/
break;
#endif
+ case TCPOPT_MPTCP:
+ mptcp_parse_option(skb, ptr, opsize, opt_rx);
+ break;
+
case TCPOPT_FASTOPEN:
tcp_parse_fastopen_option(
opsize - TCPOLEN_FASTOPEN_BASE,
@@ -4265,8 +4271,10 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
* The receiver remembers and reflects via DSACKs. Leverage the
* DSACK state and change the txhash to re-route speculatively.
*/
- if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
+ if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) {
sk_rethink_txhash(sk);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+ }
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
@@ -4424,6 +4432,9 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
+ if (!mptcp_skb_can_collapse(to, from))
+ return false;
+
#ifdef CONFIG_TLS_DEVICE
if (from->decrypted != to->decrypted)
return false;
@@ -4762,6 +4773,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen;
int eaten;
+ if (sk_is_mptcp(sk))
+ mptcp_incoming_options(sk, skb, &tp->rx_opt);
+
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
return;
@@ -4933,7 +4947,7 @@ restart:
/* The first skb to collapse is:
* - not SYN/FIN and
* - bloated or contains data before "start" or
- * overlaps to the next one.
+ * overlaps to the next one and mptcp allow collapsing.
*/
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(sk, skb->truesize) > skb->len ||
@@ -4942,7 +4956,7 @@ restart:
break;
}
- if (n && n != tail &&
+ if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false;
break;
@@ -4975,6 +4989,7 @@ restart:
else
__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk);
+ mptcp_skb_ext_move(nskb, skb);
/* Copy data, releasing collapsed skbs. */
while (copy > 0) {
@@ -4994,6 +5009,7 @@ restart:
skb = tcp_collapse_one(sk, skb, list, root);
if (!skb ||
skb == tail ||
+ !mptcp_skb_can_collapse(nskb, skb) ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end;
#ifdef CONFIG_TLS_DEVICE
@@ -5892,8 +5908,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* the segment and return)"
*/
if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
- after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))
+ after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
+ /* Previous FIN/ACK or RST/ACK might be ignored. */
+ if (icsk->icsk_retransmits == 0)
+ inet_csk_reset_xmit_timer(sk,
+ ICSK_TIME_RETRANS,
+ TCP_TIMEOUT_MIN, TCP_RTO_MAX);
goto reset_and_undo;
+ }
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
@@ -5968,6 +5990,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
+ if (sk_is_mptcp(sk))
+ mptcp_rcv_synsent(sk);
+
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
@@ -6099,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
struct request_sock *req;
- tcp_try_undo_loss(sk, false);
+ /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
+ * undo. If peer SACKs triggered fast recovery, we can't undo here.
+ */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
+ tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_sk(sk)->retrans_stamp = 0;
@@ -6333,8 +6362,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
- if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
+ if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+ if (sk_is_mptcp(sk))
+ mptcp_incoming_options(sk, skb, &tp->rx_opt);
break;
+ }
/* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
@@ -6590,6 +6622,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
+#if IS_ENABLED(CONFIG_MPTCP)
+ tcp_rsk(req)->is_mptcp = 0;
+#endif
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
@@ -6612,6 +6647,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
af_ops->init_req(req, sk, skb);
+ if (IS_ENABLED(CONFIG_MPTCP) && want_cookie)
+ tcp_rsk(req)->is_mptcp = 0;
+
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1c7326e04f9b..df1166b76126 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -701,9 +701,21 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
+ const union tcp_md5_addr *addr;
+ int l3index;
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and inet_iif is set to it.
+ */
+ l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
} else if (hash_location) {
+ const union tcp_md5_addr *addr;
+ int sdif = tcp_v4_sdif(skb);
+ int dif = inet_iif(skb);
+ int l3index;
+
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -714,14 +726,17 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
- ntohs(th->source), inet_iif(skb),
- tcp_v4_sdif(skb));
+ ntohs(th->source), dif, sdif);
/* don't send rst if it can't find key */
if (!sk1)
goto out;
- key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to it.
+ */
+ l3index = sdif ? dif : 0;
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET);
if (!key)
goto out;
@@ -905,6 +920,9 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
+ const union tcp_md5_addr *addr;
+ int l3index;
+
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
@@ -916,14 +934,15 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
- AF_INET),
+ tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
}
@@ -983,7 +1002,7 @@ DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
/* Find the Key structure for an address. */
-struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
int family)
{
@@ -1003,7 +1022,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
-
+ if (key->l3index && key->l3index != l3index)
+ continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
match = (key->addr.a4.s_addr & mask) ==
@@ -1027,7 +1047,8 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
- int family, u8 prefixlen)
+ int family, u8 prefixlen,
+ int l3index)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1046,6 +1067,8 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
+ if (key->l3index && key->l3index != l3index)
+ continue;
if (!memcmp(&key->addr, addr, size) &&
key->prefixlen == prefixlen)
return key;
@@ -1057,23 +1080,26 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
const union tcp_md5_addr *addr;
+ int l3index;
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
- return tcp_md5_do_lookup(sk, addr, AF_INET);
+ return tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);
/* This can be called on a newly created socket, from other files */
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp)
+ int family, u8 prefixlen, int l3index,
+ const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
struct tcp_md5sig_key *key;
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
if (key) {
/* Pre-existing entry - just update that one. */
memcpy(key->key, newkey, newkeylen);
@@ -1105,6 +1131,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->keylen = newkeylen;
key->family = family;
key->prefixlen = prefixlen;
+ key->l3index = l3index;
memcpy(&key->addr, addr,
(family == AF_INET6) ? sizeof(struct in6_addr) :
sizeof(struct in_addr));
@@ -1114,11 +1141,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen)
+ u8 prefixlen, int l3index)
{
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -1149,7 +1176,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
{
struct tcp_md5sig cmd;
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+ const union tcp_md5_addr *addr;
u8 prefixlen = 32;
+ int l3index = 0;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -1167,16 +1196,34 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
}
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
+ if (dev && netif_is_l3_master(dev))
+ l3index = dev->ifindex;
+
+ rcu_read_unlock();
+
+ /* ok to reference set/not set outside of rcu;
+ * right now device MUST be an L3 master
+ */
+ if (!dev || !l3index)
+ return -EINVAL;
+ }
+
+ addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
+
if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen);
+ return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
+ return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index,
+ cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
@@ -1286,7 +1333,8 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
/* Called with rcu_read_lock() */
static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ int dif, int sdif)
{
#ifdef CONFIG_TCP_MD5SIG
/*
@@ -1301,11 +1349,17 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
struct tcp_md5sig_key *hash_expected;
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
+ const union tcp_md5_addr *addr;
unsigned char newhash[16];
+ int genhash, l3index;
- hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ addr = (union tcp_md5_addr *)&iph->saddr;
+ hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
@@ -1331,11 +1385,11 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+ net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
&iph->saddr, ntohs(th->source),
&iph->daddr, ntohs(th->dest),
genhash ? " tcp_v4_calc_md5_hash failed"
- : "");
+ : "", l3index);
return true;
}
return false;
@@ -1372,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
#ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v4_md5_lookup,
@@ -1419,7 +1473,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
+ const union tcp_md5_addr *addr;
struct tcp_md5sig_key *key;
+ int l3index;
#endif
struct ip_options_rcu *inet_opt;
@@ -1467,9 +1523,10 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
#ifdef CONFIG_TCP_MD5SIG
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
/* Copy over the MD5 key from the original socket */
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET);
+ addr = (union tcp_md5_addr *)&newinet->inet_daddr;
+ key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
if (key) {
/*
* We're using one, so create a matching key
@@ -1477,8 +1534,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
+ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index,
+ key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
#endif
@@ -1808,6 +1865,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct sk_buff *skb_to_free;
int sdif = inet_sdif(skb);
+ int dif = inet_iif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
@@ -1856,7 +1914,7 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1914,7 +1972,7 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v4_inbound_md5_hash(sk, skb))
+ if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
goto discard_and_relse;
nf_reset_ct(skb);
@@ -2620,7 +2678,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
int cpu;
if (net->ipv4.tcp_congestion_control)
- module_put(net->ipv4.tcp_congestion_control->owner);
+ bpf_module_put(net->ipv4.tcp_congestion_control,
+ net->ipv4.tcp_congestion_control->owner);
for_each_possible_cpu(cpu)
inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
@@ -2675,6 +2734,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_tcp_tw_reuse = 2;
+ net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
cnt = tcp_hashinfo.ehash_mask + 1;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
@@ -2726,7 +2786,8 @@ static int __net_init tcp_sk_init(struct net *net)
/* Reno is always built in */
if (!net_eq(net, &init_net) &&
- try_module_get(init_net.ipv4.tcp_congestion_control->owner))
+ bpf_try_module_get(init_net.ipv4.tcp_congestion_control,
+ init_net.ipv4.tcp_congestion_control->owner))
net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
else
net->ipv4.tcp_congestion_control = &tcp_reno;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c4848e7a0aad..279db8822439 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -385,7 +385,8 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tp->snd_cwnd >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -400,7 +401,8 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
@@ -416,7 +418,8 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -441,6 +444,7 @@ void tcp_init_metrics(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct tcp_metrics_block *tm;
u32 val, crtt = 0; /* cached RTT scaled by 8 */
@@ -458,7 +462,8 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index c802bc80c400..ad3b56d9fa71 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -414,7 +414,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
rcu_read_lock();
ca = tcp_ca_find_key(ca_key);
- if (likely(ca && try_module_get(ca->owner))) {
+ if (likely(ca && bpf_try_module_get(ca, ca->owner))) {
icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
icsk->icsk_ca_ops = ca;
ca_got_dst = true;
@@ -425,7 +425,7 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
/* If no valid choice made yet, assign current system default ca. */
if (!ca_got_dst &&
(!icsk->icsk_ca_setsockopt ||
- !try_module_get(icsk->icsk_ca_ops->owner)))
+ !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner)))
tcp_assign_congestion_control(sk);
tcp_set_ca_state(sk, TCP_CA_Open);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 58c92a7d671c..306e25d743e8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) "TCP: " fmt
#include <net/tcp.h>
+#include <net/mptcp.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
@@ -414,6 +415,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
#define OPTION_SMC (1 << 9)
+#define OPTION_MPTCP (1 << 10)
static void smc_options_write(__be32 *ptr, u16 *options)
{
@@ -439,8 +441,17 @@ struct tcp_out_options {
__u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
+ struct mptcp_out_options mptcp;
};
+static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
+{
+#if IS_ENABLED(CONFIG_MPTCP)
+ if (unlikely(OPTION_MPTCP & opts->options))
+ mptcp_write_options(ptr, &opts->mptcp);
+#endif
+}
+
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
@@ -549,6 +560,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
}
smc_options_write(ptr, &options);
+
+ mptcp_options_write(ptr, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -584,6 +597,22 @@ static void smc_set_option_cond(const struct tcp_sock *tp,
#endif
}
+static void mptcp_set_option_cond(const struct request_sock *req,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+ if (rsk_is_mptcp(req)) {
+ unsigned int size;
+
+ if (mptcp_synack_options(req, &size, &opts->mptcp)) {
+ if (*remaining >= size) {
+ opts->options |= OPTION_MPTCP;
+ *remaining -= size;
+ }
+ }
+ }
+}
+
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
@@ -653,6 +682,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
smc_set_option(tp, opts, &remaining);
+ if (sk_is_mptcp(sk)) {
+ unsigned int size;
+
+ if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
+ opts->options |= OPTION_MPTCP;
+ remaining -= size;
+ }
+ }
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -714,6 +752,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
}
+ mptcp_set_option_cond(req, opts, &remaining);
+
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
return MAX_TCP_OPTION_SPACE - remaining;
@@ -751,16 +791,37 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}
+ /* MPTCP options have precedence over SACK for the limited TCP
+ * option space because a MPTCP connection would be forced to
+ * fall back to regular TCP if a required multipath option is
+ * missing. SACK still gets a chance to use whatever space is
+ * left.
+ */
+ if (sk_is_mptcp(sk)) {
+ unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+ unsigned int opt_size = 0;
+
+ if (mptcp_established_options(sk, skb, &opt_size, remaining,
+ &opts->mptcp)) {
+ opts->options |= OPTION_MPTCP;
+ size += opt_size;
+ }
+ }
+
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+ if (unlikely(remaining < TCPOLEN_SACK_BASE_ALIGNED +
+ TCPOLEN_SACK_PERBLOCK))
+ return size;
+
opts->num_sack_blocks =
min_t(unsigned int, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK);
- if (likely(opts->num_sack_blocks))
- size += TCPOLEN_SACK_BASE_ALIGNED +
- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+
+ size += TCPOLEN_SACK_BASE_ALIGNED +
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
}
return size;
@@ -2865,7 +2926,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (!tcp_can_collapse(sk, skb))
break;
- if (!tcp_skb_can_collapse_to(to))
+ if (!tcp_skb_can_collapse(to, skb))
break;
space -= skb->len;
@@ -3232,6 +3293,7 @@ int tcp_send_synack(struct sock *sk)
if (!nskb)
return -ENOMEM;
INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+ tcp_highest_sack_replace(sk, skb, nskb);
tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
@@ -3368,8 +3430,8 @@ static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
rcu_read_lock();
ca = tcp_ca_find_key(ca_key);
- if (likely(ca && try_module_get(ca->owner))) {
- module_put(icsk->icsk_ca_ops->owner);
+ if (likely(ca && bpf_try_module_get(ca, ca->owner))) {
+ bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
icsk->icsk_ca_ops = ca;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1097b438befe..c3f26dcd6704 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -223,6 +223,9 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
} else {
sk_rethink_txhash(sk);
+ tp->timeout_rehash++;
+ __NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPTIMEOUTREHASH);
}
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
expired = icsk->icsk_retransmits >= retry_until;
@@ -234,6 +237,9 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
} else {
sk_rethink_txhash(sk);
+ tp->timeout_rehash++;
+ __NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPTIMEOUTREHASH);
}
retry_until = net->ipv4.sysctl_tcp_retries2;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 93a355b6b092..08a41f1e1cd2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1368,7 +1368,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
- if (size < (sk->sk_rcvbuf >> 2))
+ if (size < (sk->sk_rcvbuf >> 2) &&
+ !skb_queue_empty(&up->reader_queue))
return;
} else {
size += up->forward_deficit;
@@ -1708,7 +1709,8 @@ busy_check:
/* sk_queue is empty, reader_queue may contain peeked packets */
} while (timeo &&
- !__skb_wait_for_more_packets(sk, &error, &timeo,
+ !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
+ &error, &timeo,
(struct sk_buff *)sk_queue));
*err = error;
@@ -1855,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags)
inet->inet_dport = 0;
sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) {
inet_reset_saddr(sk);
+ if (sk->sk_prot->rehash &&
+ (sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ sk->sk_prot->rehash(sk);
+ }
if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
sk->sk_prot->unhash(sk);
@@ -2104,8 +2110,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET);
__skb_push(skb, -skb_mac_offset(skb));
segs = udp_rcv_segment(sk, skb, true);
- for (skb = segs; skb; skb = next) {
- next = skb->next;
+ skb_list_walk_safe(segs, skb, next) {
__skb_pull(skb, skb_transport_offset(skb));
ret = udp_queue_rcv_one_skb(sk, skb);
if (ret > 0)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 910555a4d9fe..dccd2286bc28 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -64,8 +64,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
goto out;
err = -ENOMEM;
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) + 64,
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
GFP_KERNEL);
if (!rep)
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a3908e55ed89..1a98583a79f4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -184,6 +184,20 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
+static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int mss = skb_shinfo(skb)->gso_size;
+
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+
+ return skb;
+}
+
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features)
{
@@ -196,6 +210,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features);
+
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
@@ -354,6 +371,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct udphdr *uh2;
struct sk_buff *p;
unsigned int ulen;
+ int ret = 0;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -369,7 +387,6 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
}
/* pull encapsulating udp header */
skb_gro_pull(skb, sizeof(struct udphdr));
- skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
list_for_each_entry(p, head, list) {
if (!NAPI_GRO_CB(p)->same_flow)
@@ -383,14 +400,40 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
continue;
}
+ if (NAPI_GRO_CB(skb)->is_flist != NAPI_GRO_CB(p)->is_flist) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return p;
+ }
+
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
* leading to excessive truesize values.
* On len mismatch merge the first packet shorter than gso_size,
* otherwise complete the GRO packet.
*/
- if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
- ulen != ntohs(uh2->len) ||
+ if (ulen > ntohs(uh2->len)) {
+ pp = p;
+ } else {
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ if (!pskb_may_pull(skb, skb_gro_offset(skb))) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+ if ((skb->ip_summed != p->ip_summed) ||
+ (skb->csum_level != p->csum_level)) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+ ret = skb_gro_receive_list(p, skb);
+ } else {
+ skb_gro_postpull_rcsum(skb, uh,
+ sizeof(struct udphdr));
+
+ ret = skb_gro_receive(p, skb);
+ }
+ }
+
+ if (ret || ulen != ntohs(uh2->len) ||
NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
pp = p;
@@ -401,36 +444,29 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
-INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
- __be16 sport, __be16 dport));
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
- struct udphdr *uh, udp_lookup_t lookup)
+ struct udphdr *uh, struct sock *sk)
{
struct sk_buff *pp = NULL;
struct sk_buff *p;
struct udphdr *uh2;
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- struct sock *sk;
- rcu_read_lock();
- sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
- udp4_lib_lookup_skb, skb, uh->source, uh->dest);
- if (!sk)
- goto out_unlock;
+ if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+ NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
- if (udp_sk(sk)->gro_enabled) {
+ if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
- rcu_read_unlock();
return pp;
}
- if (NAPI_GRO_CB(skb)->encap_mark ||
+ if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid) ||
!udp_sk(sk)->gro_receive)
- goto out_unlock;
+ goto out;
/* mark that this skb passed once through the tunnel gro layer */
NAPI_GRO_CB(skb)->encap_mark = 1;
@@ -457,8 +493,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
-out_unlock:
- rcu_read_unlock();
+out:
skb_gro_flush_final(skb, pp, flush);
return pp;
}
@@ -468,8 +503,10 @@ INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sk_buff *pp;
+ struct sock *sk;
- if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+ if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -480,11 +517,15 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
inet_gro_compute_pseudo))
goto flush;
else if (uh->check)
- skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP,
inet_gro_compute_pseudo);
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 0;
- return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
+ rcu_read_lock();
+ sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+ pp = udp_gro_receive(head, skb, uh, sk);
+ rcu_read_unlock();
+ return pp;
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -517,9 +558,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
rcu_read_lock();
sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
udp4_lib_lookup_skb, skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_enabled) {
- err = udp_gro_complete_segment(skb);
- } else if (sk && udp_sk(sk)->gro_complete) {
+ if (sk && udp_sk(sk)->gro_complete) {
skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL;
@@ -529,6 +568,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
skb->encapsulation = 1;
err = udp_sk(sk)->gro_complete(sk, skb,
nhoff + sizeof(struct udphdr));
+ } else {
+ err = udp_gro_complete_segment(skb);
}
rcu_read_unlock();
@@ -544,6 +585,23 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ uh->len = htons(skb->len - nhoff);
+
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+ skb->csum_level++;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = 0;
+ }
+
+ return 0;
+ }
+
if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 8a4285712808..ea595c8549c7 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -72,6 +72,14 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
if (!head)
goto out;
+ if (!skb_dst(skb)) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, skb->dev))
+ goto drop;
+ }
+
for_each_protocol_rcu(*head, handler)
if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
return ret;
@@ -79,6 +87,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
out:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+drop:
kfree_skb(skb);
return 0;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 39d861d00377..46d614b611db 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1226,11 +1226,13 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
}
static void
-cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt)
+cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
+ bool del_rt, bool del_peer)
{
struct fib6_info *f6i;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
@@ -1293,7 +1295,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
/* clean up prefsrc entries */
@@ -3345,6 +3347,10 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP)) {
/* Alas, we support only Ethernet autoconfiguration. */
+ idev = __in6_dev_get(dev);
+ if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP &&
+ dev->flags & IFF_MULTICAST)
+ ipv6_mc_up(idev);
return;
}
@@ -4586,12 +4592,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
- unsigned long expires, u32 flags)
+ unsigned long expires, u32 flags,
+ bool modify_peer)
{
struct fib6_info *f6i;
u32 prio;
- f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len,
+ f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (!f6i)
return -ENOENT;
@@ -4602,7 +4610,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
/* add new one */
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
+ ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
} else {
@@ -4624,6 +4633,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
unsigned long timeout;
bool was_managetempaddr;
bool had_prefixroute;
+ bool new_peer = false;
ASSERT_RTNL();
@@ -4655,6 +4665,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
cfg->preferred_lft = timeout;
}
+ if (cfg->peer_pfx &&
+ memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ cleanup_prefix_route(ifp, expires, true, true);
+ new_peer = true;
+ }
+
spin_lock_bh(&ifp->lock);
was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR;
had_prefixroute = ifp->flags & IFA_F_PERMANENT &&
@@ -4670,6 +4687,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
ifp->rt_priority = cfg->rt_priority;
+ if (new_peer)
+ ifp->peer_addr = *cfg->peer_pfx;
+
spin_unlock_bh(&ifp->lock);
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
@@ -4678,7 +4698,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags);
+ rc = modify_prefix_route(ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4686,6 +4706,15 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
}
+
+ if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
+ rc = modify_prefix_route(ifp, expires, flags, true);
+
+ if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
+ addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
+ ifp->rt_priority, ifp->idev->dev,
+ expires, flags, GFP_KERNEL);
+ }
} else if (had_prefixroute) {
enum cleanup_prefix_rt_t action;
unsigned long rt_expires;
@@ -4696,7 +4725,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
if (action != CLEANUP_PREFIX_RT_NOP) {
cleanup_prefix_route(ifp, rt_expires,
- action == CLEANUP_PREFIX_RT_DEL);
+ action == CLEANUP_PREFIX_RT_DEL, false);
}
}
@@ -5718,6 +5747,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
struct nlattr *tb[IFLA_INET6_MAX + 1];
int err;
+ if (!idev)
+ return -EAFNOSUPPORT;
+
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
BUG();
@@ -5980,9 +6012,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
- addrconf_prefix_route(&ifp->peer_addr, 128, 0,
- ifp->idev->dev, 0, 0,
- GFP_ATOMIC);
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->rt_priority, ifp->idev->dev,
+ 0, 0, GFP_ATOMIC);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index e31626ffccd1..fd535053245b 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -79,6 +79,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (!x)
goto out_reset;
+ skb->mark = xfrm_smark_get(skb->mark, x);
+
sp->xvec[sp->len++] = x;
sp->olen++;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7bae6a91b487..72abf892302f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -370,6 +370,21 @@ static int call_fib6_entry_notifier(struct notifier_block *nb,
return call_fib6_notifier(nb, event_type, &info.info);
}
+static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ unsigned int nsiblings,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
+ .rt = rt,
+ .nsiblings = nsiblings,
+ };
+
+ return call_fib6_notifier(nb, event_type, &info.info);
+}
+
int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
@@ -400,6 +415,17 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info);
}
+int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ .nsiblings = rt->fib6_nsiblings,
+ };
+
+ rt->fib6_table->fib_seq++;
+ return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
+}
+
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
@@ -408,22 +434,29 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
- if (rt == arg->net->ipv6.fib6_null_entry)
+ enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ int err;
+
+ if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
- rt, arg->extack);
+
+ if (rt->fib6_nsiblings)
+ err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
+ rt,
+ rt->fib6_nsiblings,
+ arg->extack);
+ else
+ err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
+ arg->extack);
+
+ return err;
}
static int fib6_node_dump(struct fib6_walker *w)
{
- struct fib6_info *rt;
- int err = 0;
+ int err;
- for_each_fib6_walker_rt(w) {
- err = fib6_rt_dump(rt, w->args);
- if (err)
- break;
- }
+ err = fib6_rt_dump(w->leaf, w->args);
w->leaf = NULL;
return err;
}
@@ -1039,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool notify_sibling_rt = false;
u16 nlflags = NLM_F_EXCL;
int err;
@@ -1068,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -1112,7 +1145,9 @@ next_iter:
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -1130,6 +1165,7 @@ next_iter:
/* Find the first route that have the same metric */
sibling = leaf;
+ notify_sibling_rt = true;
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -1139,6 +1175,7 @@ next_iter:
}
sibling = rcu_dereference_protected(sibling->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
+ notify_sibling_rt = false;
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -1165,10 +1202,21 @@ next_iter:
add:
nlflags |= NLM_F_CREATE;
- if (!info->skip_notify_kernel) {
+ /* The route should only be notified if it is the first
+ * route in the node or if it is added as a sibling
+ * route to the first route in the node.
+ */
+ if (!info->skip_notify_kernel &&
+ (notify_sibling_rt || ins == &fn->leaf)) {
+ enum fib_event_type fib_event;
+
+ if (notify_sibling_rt)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_ADD,
- rt, extack);
+ fib_event, rt,
+ extack);
if (err) {
struct fib6_info *sibling, *next_sibling;
@@ -1212,7 +1260,7 @@ add:
return -ENOENT;
}
- if (!info->skip_notify_kernel) {
+ if (!info->skip_notify_kernel && ins == &fn->leaf) {
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1845,13 +1893,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct fib6_info __rcu **rtp, struct nl_info *info)
{
+ struct fib6_info *leaf, *replace_rt = NULL;
struct fib6_walker *w;
struct fib6_info *rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
+ bool notify_del = false;
RT6_TRACE("fib6_del_route\n");
+ /* If the deleted route is the first in the node and it is not part of
+ * a multipath route, then we need to replace it with the next route
+ * in the node, if exists.
+ */
+ leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (leaf == rt && !rt->fib6_nsiblings) {
+ if (rcu_access_pointer(rt->fib6_next))
+ replace_rt = rcu_dereference_protected(rt->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ else
+ notify_del = true;
+ }
+
/* Unlink it */
*rtp = rt->fib6_next;
rt->fib6_node = NULL;
@@ -1869,6 +1933,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
+ /* The route is deleted from a multipath route. If this
+ * multipath route is the first route in the node, then we need
+ * to emit a delete notification. Otherwise, we need to skip
+ * the notification.
+ */
+ if (rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf))
+ notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
@@ -1904,8 +1976,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
- if (!info->skip_notify_kernel)
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
+ if (!info->skip_notify_kernel) {
+ if (notify_del)
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
+ rt, NULL);
+ else if (replace_rt)
+ call_fib6_entry_notifiers_replace(net, replace_rt);
+ }
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
@@ -2495,14 +2572,13 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
+ ++(*pos);
if (!v)
goto iter_table;
n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
- if (n) {
- ++*pos;
+ if (n)
return n;
- }
iter_table:
ipv6_route_check_sernum(iter);
@@ -2510,8 +2586,6 @@ iter_table:
r = fib6_walk_continue(&iter->w);
spin_unlock_bh(&iter->tbl->tb6_lock);
if (r > 0) {
- if (v)
- ++*pos;
return iter->w.leaf;
} else if (r < 0) {
fib6_walker_unlink(net, &iter->w);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ee968d980746..781ca8c07a0d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -437,8 +437,6 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
switch (type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -452,7 +450,10 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
}
return 0;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if (code == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -468,6 +469,7 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
}
return 0;
+ }
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
return 0;
@@ -1466,7 +1468,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
dev->mtu -= 8;
if (tunnel->parms.collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
netif_keep_dst(dev);
}
ip6gre_tnl_init_features(dev);
@@ -1894,7 +1895,6 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2197,7 +2197,6 @@ static void ip6erspan_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..e0086758b6ee 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,38 @@ out:
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ icmpv6_send(skb_in, type, code, info);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
+ goto out;
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ icmpv6_send(skb_in, type, code, info);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2f376dbc37d5..4703b09808d0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -121,6 +121,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @link: ifindex of underlying interface
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -134,37 +135,56 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
static struct ip6_tnl *
-ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
+ip6_tnl_lookup(struct net *net, int link,
+ const struct in6_addr *remote, const struct in6_addr *local)
{
unsigned int hash = HASH(remote, local);
- struct ip6_tnl *t;
+ struct ip6_tnl *t, *cand = NULL;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct in6_addr any;
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else
+ cand = t;
}
memset(&any, 0, sizeof(any));
hash = HASH(&any, local);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_any(&t->parms.raddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_any(&t->parms.raddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
hash = HASH(remote, &any);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
- if (ipv6_addr_equal(remote, &t->parms.raddr) &&
- ipv6_addr_any(&t->parms.laddr) &&
- (t->dev->flags & IFF_UP))
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ !ipv6_addr_any(&t->parms.laddr) ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (link == t->parms.link)
return t;
+ else if (!cand)
+ cand = t;
}
+ if (cand)
+ return cand;
+
t = rcu_dereference(ip6n->collect_md_tun);
if (t && t->dev->flags & IFF_UP)
return t;
@@ -351,7 +371,8 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
- ipv6_addr_equal(remote, &t->parms.raddr)) {
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ p->link == t->parms.link) {
if (create)
return ERR_PTR(-EEXIST);
@@ -485,7 +506,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
processing of the error. */
rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
+ t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr);
if (!t)
goto out;
@@ -496,8 +517,6 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
err = 0;
switch (*type) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- __u32 mtu, teli;
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
@@ -510,7 +529,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
rel_msg = 1;
}
break;
- case ICMPV6_PARAMPROB:
+ case ICMPV6_PARAMPROB: {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 teli;
+
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
@@ -527,7 +549,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
t->parms.name);
}
break;
- case ICMPV6_PKT_TOOBIG:
+ }
+ case ICMPV6_PKT_TOOBIG: {
+ __u32 mtu;
+
ip6_update_pmtu(skb, net, htonl(*i